class TextBase(BaseField): ''' Virtual class common to Text and Entry ''' def setup(self, entry): BaseField.setup(self, entry) if self.string and self.string[0] == '@': self.string = ' ' + self.string return def update_content(self, entry, text): if text[0] == '@' and hasattr(entry, 'set_native'): try: entry.set_native(self.field, string.lstrip(text[1:])) except Exceptions.ParserError, msg: Utils.error_dialog(_("Error in native string parsing"), str(msg)) return -1 return 1 text = string.lstrip(text) entry[self.field] = Fields.Text(text) return 1
def next(self): current = None data = '' table = {} # Skip whitespace while 1: line = self.file.readline() if line == '': return table line = string.rstrip(line) if line != '': break while 1: head = header.match(line) if head: if current: if table.has_key(current): table[current].append(data) else: table[current] = [data] current = string.strip(head.group(1)) data = head.group(2) else: cont = contin.match(line) if cont: data = data + ' ' + cont.group(1) line = self.file.readline() if line == '': break line = string.rstrip(line) if line == '': break # don't forget the last item if current: if table.has_key(current): table[current].append(data) else: table[current] = [data] # create the entry with the actual fields norm = {} type = Types.get_entry('article') if table.has_key('PMID'): norm['url'] = Fields.URL(medurl + table['PMID'][0]) norm['medline-pmid'] = Fields.Text(table['PMID'][0]) del table['PMID'] if table.has_key('UI'): norm[one_to_one['UI']] = Fields.Text(table['UI'][0]) del table['UI'] if table.has_key('AU'): group = Fields.AuthorGroup() for au in table['AU']: # analyze the author by ourself. first, last, lineage = [], [], [] for part in string.split(au, ' '): if part.isupper(): # in upper-case, this is a first name if len(last) > 0: first.append(part) else: # if there is no last name, there can't be a first name last.append(part) else: if len(first) > 0: # there was a first name, this must be a lineage lineage.append(part) else: last.append(part) if len(first) > 1: print "medline: long first name found. skipping." first = first[0:1] if len(first) > 0: first = string.join(first[0], '. ') + '.' else: first = None if len(last) > 0: last = string.join(last, ' ') else: last = None if len(lineage) > 0: lineage = string.join(lineage, ' ') else: lineage = None group.append(Fields.Author((None, first, last, lineage))) norm[one_to_one['AU']] = group del table['AU'] if table.has_key('DP'): fields = string.split(table['DP'][0], ' ') norm[one_to_one['DP']] = Fields.Date(fields[0]) del table['DP'] # The simple fields... for f in table.keys(): f_mapped = one_to_one.get(f, 'medline-%s' % (f.lower())) text_type = Types.get_field(f_mapped).type norm[f_mapped] = text_type(string.join(table[f], " ; ")) return Base.Entry(None, type, norm)
def next (self): lines = {} in_table = {} file_notes, file_time, file_version, file_format = ('','','','') while 1: line = self.file.readline() if line == '': return lines # what does that mean ?? head = xheader.match(line) if not head : pass elif head.group(1) == 'Date:': file_time = time.strftime( "%Y-%m-%d %H:%M", rfc822.parsedate(head.group(2))) elif head.group(1) == 'Notes:': file_notes = string.strip(head.group(2)) elif head.group(1) == 'FN': file_format = head.group(2) elif head.group(1) == 'VR': file_version = head.group(2) elif len(head.group(1)) == 2 : break else : pass self.extraneous.append(line) self.isifileformat = self.isifileformat or "Isifile format %s(%s)" % ( file_format, file_version) self.isifileinfo = self.isifileinfo or "ISI %s (%s) %s" %( file_time, file_notes, login_name) while 1: if line == 'ER':break if head : key = head.group(1) if key == 'ER': break val = head.group(3) if lines.has_key(key): lines[key].append(val) else: lines[key] = [val] else: cont = contin.match(line) if cont : val = cont.group(1) lines[key].append(val) else: break line = self.file.readline() if line == '': break # error situation head = header.match (line) key = 'PT' if lines.has_key(key): if string.strip(lines[key][0])[0] == 'J': del lines [key] else: print 'Warning: Unknown type of entry (%s) -- may need editing.' %( lines[key]) type = Types.get_entry ('article') for key in ( 'AU', 'ED'): if lines.has_key(key): group = Fields.AuthorGroup() for item in lines[key]: if string.strip(item) =='[Anon]' : auth = [item] else: name, firstn = string.split (item, ',') auth = ["%s, " % name] for i in string.strip (firstn): auth.append ("%s. " % i) group.append (Fields.Author("".join(auth))) if key == 'AU': in_table['author'] = group elif key == 'ED': in_table['editor'] = group del lines[key] key, key1, key2 = 'PG', 'BP', 'EP' if lines.has_key(key1) and lines.has_key(key2): if len(lines[key1]) == len(lines[key2]): pages = [] for i in range(len(lines[key1])): firstpg = lines[key1] [i] lastpg = lines[key2] [i] pages.append(('%s -- %s' % (firstpg, lastpg))) in_table['pages'] = Fields.Text (string.join(pages, '; ')) del lines[key1]; del lines[key2] else: print 'inconsistent BP, EP fields found' if lines.has_key(key): in_table['size'] = Fields.Text ('%s p.' %(lines[key][0])) del lines[key] key = 'PY' if lines.has_key(key): val = lines[key][0] in_table['date'] = Fields.Date(val) del lines[key] key = 'ID' if lines. has_key(key): val = "[ISI:] %s ;;" %(string.lower(string.join(lines[key], ' '))) if lines.has_key('DE'): lines['DE'].append ('; ' + val) else : lines['DE'] = [val] del lines[key] # journal titles come in various forms if lines.has_key ('SO'): uc_title = ' '.join(lines['SO']) in_table ['journal'] = Fields.Text (uc_title) if lines.has_key('JI'): uc_title = re.split(r"([- .,/]+)", uc_title) ca_title = re.split(r"[- .,/]+", ' '.join(lines['JI'])) i , Title = 0, [] for word in uc_title: Word = string.capitalize(word) if word == ca_title [i]: Title.append (word) i += 1 elif Word.startswith(ca_title[i]): Title.append(Word) i += 1 else: Title.append(string.lower(word)) del lines['JI'] in_table['journal'] = Fields.Text ("".join(Title)) del lines['SO'] for key in lines.keys(): mapped_key, joiner = key_map.get( key, ('isifile-%s' %(key.lower()), ' ; ')) text_type = Types.get_field (mapped_key).type in_table [mapped_key] = text_type (joiner.join(lines[key])) return Base.Entry ( None, type, in_table)
def next(self): dict = {} # read entry till next blank line text = [] field = '' while 1: line = self.file.readline() if line == '': break line = string.rstrip(line) # starting with a blank ? if line == '' or line[0] == ' ': # ...then we continue the current text text.append(string.lstrip(line)) continue # new entry ? if separator_re.match(line): break # else, this is a new field if field: # save the previous one if needed dict[field] = '\n'.join(text) text = [] # store the name of this new field field = string.lower(line) # don't waste the last field content if field: dict[field] = '\n'.join(text) # did we parse a field ? if len(dict) == 0: return None # create the entry content entry = Base.Entry(type=self.deftype) for key in dict.keys(): if not self.mapping.has_key(key): #print "warning: unused key `%s'" % key continue (name, type) = self.mapping[key] text_type = Types.get_field(name).type # parse a simple text field if type == SimpleField: entry[name] = text_type(string.strip(dict[key])) elif type == KeywordField: text = string.strip(dict[key]) if entry.has_key(name): text = str(entry[name]) + '\n ' + text entry[name] = text_type(text) # parse an author field elif type == AuthorField: entry[name] = self.parse_author(dict[key]) continue # parse a source field elif type == SourceField: dict_key = ' '.join(dict[key].split('\n')) m = self.source_re.match(dict_key.strip()) if m: year, month, day = None, None, None j, v, s, n, p, o, y, d = m.group('journal', 'volume', 'inseries', 'number', 'pages', 'other', 'year', 'month') if s: ### article in a monograph series entry['booktitle'] = Fields.Text(j) if d: entry['beigabevermerk'] = Fields.LongText(d) entry.type = Types.get_entry('incollection') elif j: entry['journal'] = Fields.Text(j) if d and not d.isspace(): dates = d.split() try: month = long_month[dates[0]] except KeyError: pass ## import warnings ## warnings.filterwarnings ('once', ## message='date', ## module='OvidLike') ## warnings.warn ( ## 'OVID: %s is not representable in date ' ## 'field %s' %(dates[0], d), stacklevel=2) if len(dates) > 1: day = int(dates[1]) if v: entry['volume'] = Fields.Text(v) if n: entry['number'] = Fields.Text(n) if p: entry['pages'] = Fields.Text(p) if o: entry['other-note'] = Fields.Text(o) if y: year = int(y) entry['date'] = Fields.Date((year, month, day)) else: print '>>> Error: Source field does not parse correctly:' print dict_key print entry continue return entry