def load_author(response,author): auths = response.xpath(author['auth']) for auth in auths: l = ItemLoader(item = AuthorItem(), response = response) l.default_onput_processor = TakeFirst() # author's first name and last name fn = auth.xpath(author['fn']).extract()[0] ln = auth.xpath(author['ln']).extract()[0] l.add_value('fname', fn) l.add_value('lname', ln) # author's email try: email = auth.xpath(author['email']).extract()[0][7:] l.add_value('email', email) except: pass # author's address and institution try: fid = auth.xpath(author['fid']).extract()[0][1:] address = l.get_xpath(author['address'] %fid) for i in address[0].split(', '): if 'niversity' in i: institution = i break l.add_value('address', address) l.add_value('institution', institution) except: pass # author's vitae try: href = auth.xpath(author['href']).extract()[0][1:] vitae = response.xpath(author['vitae'] %href).extract()[0] l.add_value('vitae', fn+' '+ln+vitae) except: pass # author's avatar try: href = auth.xpath(author['href']).extract()[0][1:] avatar = response.xpath(author['avatar'] %href).extract()[0] l.add_value('avatar', avatar) except: pass yield l