def merge_items(self): actress_item_list = [] for item in items.merge_items(self): actress_item = self._actress_item_cls() for key, value in item.iteritems(): actress_item[key] = value actress_item_list.append(actress_item) return actress_item_list
def _to_sqlalchemy(self, item): item_list = merge_items(item) if not item_list: #from scrapy.exceptions import CloseSpider raise ValueError('Item list is empty') #raise CloseSpider('Item list is empty') else: for item in item_list: super(SAHeyzoNewTitleItemExporter, self)._to_sqlalchemy(item)
def _to_sqlalchemy(self, item): #for key, value_list in item.iteritems(): #merged_items = self._merge_items(item) #for item in merged_items: #for item in self._get_sa_item(item): for item in merge_items(item): #actress = session.query(self._actress_cls).filter_by(name=item['actress_name']).first() actress = session.query(self._actress_cls).filter_by(name=item['name']).first() if actress: pass if not actress: actress = self._actress_cls() #ev_actress = self._get_ev_actress(session, item) ev_actress = self._get_ev_actress(item) actress.ev_actress = ev_actress #actress.name = item['actress_name'] #actress.name_index = item['actress_name_index'] #actress.name_kana = item['actress_name_kana'] session.add(actress) session.commit()
def _merge_items(self): new_item = {} for key, value in self._item.iteritems(): if key == 'actress_names': #new_item['actress_name'] = value new_item['actress_name'] = [v.strip() for v in value] elif key == 'actress_name_kana': new_item['actress_name_hiragana'] = [cnvk.convert(i, cnvk.KATA2HIRA) for i in value] new_item['actress_name_katakana'] = [cnvk.convert(i, cnvk.HIRA2KATA) for i in value] elif key == 'actress_name_index': if value: new_item['actress_name_index_hiragana'] = [i for i in itertools.repeat(cnvk.convert(value.decode('utf-8'), cnvk.KATA2HIRA), len(self._item['actress_names']))] new_item['actress_name_index_katakana'] = [i for i in itertools.repeat(cnvk.convert(value.decode('utf-8'), cnvk.HIRA2KATA), len(self._item['actress_names']))] else: new_item['actress_name_index_hiragana'] = new_item['actress_name_index_katakana'] = [i for i in itertools.repeat(value, len(self._item['actress_names']))] elif key == 'image_urls': new_item['image_name'] = ['%s.jpg' % hashlib.sha1(i).hexdigest() for i in value] else: new_item[key] = value return merge_items(new_item)
def _to_sqlalchemy(self, item): item_list = merge_items(item) for item in item_list: super(SAOmanyNewTitleItemExporter, self)._to_sqlalchemy(item)
def _to_sqlalchemy(self, item): for item in merge_items(item): # ev_actress_image = session.query(self._ev_actress_image_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==item['name']).\ # filter(self._ev_actress_image_cls.name==item['image']).first() # if ev_actress_image: # actress_image = session.query(self._actress_image_cls).\ # filter_by(ev_actress_image_id=ev_actress_image.ev_actress_image_id).first() # """ actress_image should be None if ev_actress_image is None # because actress_image is a child of ev_actress_image """ # if not ev_actress_image or not actress_image: # actress_image = self._actress_image_cls() ev_actress = self._get_ev_actress(item) #actress = session.query(self._actress_cls).filter_by(ev_actress_id=ev_actress.ev_actress_id).first() actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==item['name']).\ first() if not actress: #ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(name=item['image']).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # one() #join(self._ev_actress_cls).\ #filter(self._ev_actress_image_cls.name==item['image']).\ #filter(self._ev_actress_cls.name==item['name']).one() #filter_by(ev_actress_id=ev_actress.ev_actress_id).first() #actress_image = self._actress_image_cls() #actress_image.ev_actress_image = ev_actress_image #ev_actress = self._get_ev_actress(item) actress = self._actress_cls() #actress.image = actress_image actress.ev_actress = ev_actress ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(name=item['image']).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() actress_image = session.query(self._actress_image_cls).\ filter_by(ev_actress_image_id=ev_actress_image.ev_actress_image_id).\ first() """ In case that an actress name duplicated on the site then has more than two images of that actress 名前が同一で異なる女優の場合、faceイメージが異なるので画像を追加する """ """ Assume actress_image has already been assigned to actress if it's already existed then do nothing actress_imageが既に存在する場合、それは既にactressにアサインされていると見なすため、 何もしない。 """ if not actress_image: actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image #actress.image = actress_image actress.images.append(actress_image) session.add(actress) session.commit()