def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: category = None if isinstance(value, unicode): category = session.query(self._category_cls).\ filter_by(name=value).\ filter_by(category_type_id=category_type.category_type_id).\ first() else: category = session.query(self._category_cls).\ filter_by(name=value['name']).\ filter_by(category_type_id=category_type.category_type_id).\ first() if not category: if isinstance(value, unicode): category = self._category_cls(value) ev_category = self._get_ev_category(session, value, ev_category_type_id) else: category = self._category_cls(**value.to_category()) ev_category = self._get_ev_category(session, value['name'], ev_category_type_id) category.category_type = category_type #ev_category = self._get_ev_category(session, value, ev_category_type_id) category.ev_category = ev_category session.add(category) session.commit()
def _to_sqlalchemy(self, item): category = session.query(self._ippondo_category).\ filter_by(original_url=item['category_url']).\ one() for td in item['title_description']: try: """ Retrieve a title from the title_description """ title = session.query(self._ippondo_title).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.description.like('%s%%' % td)).\ one() except NoResultFound: pass except MultipleResultsFound: raise MultipleResultsFound(td.encode('utf-8')) else: if not category.ev_category.name in title.ev_title.categories_ap: title.ev_title.categories.append(category.ev_category) if not category.name in title.categories_ap: title.categories.append(category) session.add(title) session.commit()
def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: category = None #category = session.query(self._category_cls).filter_by(name=encoded_value).filter_by(category_type_id=category_type.category_type_id).first() category = session.query(self._category_cls).\ filter_by(name=value).\ filter_by(category_type_id=category_type.category_type_id).\ first() #categories = session.query(self._category_cls).filter_by(name=encoded_value).all() #if categories: # for c in categories: # if c.category_type_id == category_type.category_type_id: # """ # The category name in that category type is already registered. # Skip the following procedure. # """ # print '@@@ The keyword %s is already registered. @@@' % category.name # category = c # break if not category: #category = self._category_cls(encoded_value) category = self._category_cls(value) #category = self._category_cls() category.category_type = category_type #ev_category = self._get_ev_category(encoded_value, ev_category_type_id) ev_category = self._get_ev_category(session, value, ev_category_type_id) category.ev_category = ev_category session.add(category) session.commit()
def _to_sqlalchemy(self, item): arg = {self._key: item[self._key]} new_title = session.query(self._new_title_cls).filter_by(**arg).first() if not new_title: new_title = self._new_title_cls(**item) session.add(new_title) session.commit()
def _to_sqlalchemy(self, item): #for item in merge_items(item): for i in item.merge_items(): ev_actress = self._get_ev_actress(i) #actress = session.query(self._actress_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==i['name']).\ # first() #if not actress: # actress = self._actress_cls() # actress.ev_actress = ev_actress try: actress = session.query(self._actress_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress if not i['image'] in actress.images_ap: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(name=i['image']).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image """ In case that an actress name duplicated on the site then has more than two images of that actress 名前が同一で異なる女優の場合、faceイメージが異なるので画像を追加する """ actress.images.append(actress_image) session.add(actress) session.commit()
def _get_ev_actress(self, item): ev_actress_query = session.query(self._ev_actress_cls).filter_by(name=item['name']) try: ev_actress = ev_actress_query.one() except NoResultFound: ev_actress = self._ev_actress_cls(**(item.to_insert())) """ The following procedure is not needed becuase if not item['image']['name'] in ev_actress.images_ap part takes care of it. """ #ev_actress_image = self._ev_actress_image_cls(name=item['image']['name'], # original_url=item['image']['original_url']) #ev_actress.images.append(ev_actress_image) else: """ Update values if fields (such as name_index_hiragana) other than name and image which are null and items have them """ ev_actress_query.update(item.to_update()) if not item['image']['name'] in ev_actress.images_ap: ev_actress_image = self._ev_actress_image_cls(name=item['image']['name'], original_url=item['image']['original_url']) ev_actress.images.append(ev_actress_image) session.add(ev_actress) session.commit() return ev_actress
def _to_sqlalchemy(self, item): #xml_feed_item = self._exists(session, item) arg = {self._key: item[self._key]} xml_feed_item = session.query(self._xml_feed_item_cls).filter_by(**arg).first() if not xml_feed_item: xml_feed_item = self._xml_feed_item_cls(**item) session.add(xml_feed_item) session.commit()
def main(): for new_title_cls in new_title_clsses: new_titles = session.query(new_title_cls).\ filter(new_title_cls.created > datetime.today() - timedelta(weeks=1)).\ all() for new_title in new_titles: new_title.scraped = True session.add(new_title) session.commit()
def _get_ev_title(self, item): ev_title = session.query(self._ev_title_cls).filter_by(original_id=item['original_id']).first() if ev_title: pass else: ev_categories = [] for category_name in item['categories']: try: #category = session.query(self._ev_category_cls).filter_by(name=category_name).one() site_category = session.query(self._category_cls).filter_by(name=category_name).one() except NoResultFound as e: #print e log.msg(e) else: """ Do nothing if category is None """ ev_categories.append(site_category.ev_category) ev_actresses = [] for actress_name in item['actresses']: try: ev_actress = session.query(self._ev_actress_cls).filter_by(name=actress_name).one() except NoResultFound as e: #print e log.msg(e) else: """ Do nothing if actress is None """ ev_actresses.append(ev_actress) ev_title_images = {} for image_type, names in item['images'].iteritems(): ev_title_images = [] for name in names: ev_title_image = session.query(self._ev_title_image_cls).filter_by(name=name).first() if not ev_title_image: ev_title_image = self._ev_title_image_cls(name=name, image_type=image_type) ev_title_images.append(ev_title_image) ev_title_images[image_type] = ev_title_images #ev_title_images = self._ev_title_image_cls(**item['images']) ev_title = self._ev_title_cls(**(self._to_insert(item))) ev_title.categories = ev_categories ev_title.actresses = ev_actresses ev_title.images = ev_title_images session.add(ev_title) session.commit() return ev_title
def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: encoded_value = value.encode('utf-8') #ev_category_id = self._get_ev_category_id(encoded_value, ev_category_type_id) ev_category = self._get_ev_category(encoded_value, ev_category_type_id) #category = self._category_cls(encoded_value, ev_category_id) category = self._category_cls(encoded_value) category.category_type = category_type category.ev_category = ev_category session.add(category) session.commit()
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ first() #title = session.query(X1XJPTitle).filter_by(original_id=item['original_id']).first() if not title: #title = self._title_cls(**self._to_insert(item)) title = self._title_cls(self._to_insert(item)) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) title.ev_title = ev_title for image_type, names in item['images'].iteritems(): for name in names: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(name=name).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) #categories = [] for c in item['categories']: category = session.query(self._category_cls).filter_by(name=c).first() if category: #categories.append(category) title.categories.append(category) #if categories: # title.categories = categories #actresses = [] for a in item['actresses']: actress = session.query(self._actress_cls).filter_by(name=a).first() if actress: #actresses.append(actress) title.actresses.append(actress) #if actresses: # title.actresses = actresses session.add(title) session.commit()
def _to_sqlalchemy(self, item): #for key, value_list in item.iteritems(): merged_items = self._merge_items(item) for item in merged_items: actress = session.query(self._actress_cls).filter_by(name=item['actress_name']).first() if not actress: actress = self._actress_cls() #ev_actress = self._get_ev_actress(session, item) ev_actress = self._get_ev_actress(item) actress.ev_actress = ev_actress #actress.name = item['actress_name'] #actress.name_index = item['actress_name_index'] #actress.name_kana = item['actress_name_kana'] session.add(actress) session.commit()
def _get_ev_actress(self, item): ev_actress_query = session.query(self._ev_actress_cls).filter_by(name=item['name']) #ev_actress = session.query(self._ev_actress_cls).filter_by(name=item['name']).first() ev_actress = ev_actress_query.first() #new_item = item.copy() if ev_actress: #del new_item['name'] #del new_item['image'] """ Update values if fields other than name and image which are null and items have them """ #for key, value in new_item.iteritems(): # if not getattr(ev_actress, key) and value: # setattr(ev_actress, key, value) ev_actress_query.update(self._to_update(item)) ev_actress_image = session.query(self._ev_actress_image_cls).filter_by(name=item['image']).first() if not ev_actress_image and item['image']: ev_actress_image = self._ev_actress_image_cls(item['image']) ev_actress.images.append(ev_actress_image) else: #if not 'actress_name_hiragana' in item or not 'actress_name_katakana' in item: # """ Both should be None if one of them does not appear in item """ # item['actress_name_hiragana'] = item['actress_name_katakana'] = None #ev_actress = self._ev_actress_cls(item['actress_name'], # item['actress_name_hiragana'], # item['actress_name_katakana'], # item['actress_name_index_hiragana'], # item['actress_name_index_katakana']) ev_actress_image = self._ev_actress_image_cls(item['image']) #del new_item['image'] #ev_actress = self._ev_actress_cls(**new_item) #ev_actress = self._ev_actress_cls(**(self._to_insert(item))) print '### item ###' print type(item) print '### item end ###' ev_actress = self._ev_actress_cls(**(item.to_insert())) ev_actress.images.append(ev_actress_image) session.add(ev_actress) session.commit() return ev_actress
def _get_ev_category(self, category_name, ev_category_type_id): ev_category = session.query(self._ev_category_cls).filter(self._ev_category_cls.name.contains(category_name)).first() if ev_category: return ev_category else: ev_category_list = session.query(self._ev_category_cls).all() for ev_category in ev_category_list: if ev_category.name in category_name: return ev_category ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() ev_category = self._ev_category_cls(category_name) #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) session.add(ev_category) session.commit() """ Return the id which is added above """ return ev_category
def _get_ev_actress(self, item): ev_actress_query = session.query(self._ev_actress_cls).filter_by(name=item['name']) #ev_actress = ev_actress_query.first() #if ev_actress: # """ Update values if fields other than name and image # which are null and items have them """ # ev_actress_query.update(item.to_update()) #ev_actress_image = session.query(self._ev_actress_image_cls).filter_by(name=item['image']).first() #if not ev_actress_image and item['image']: # ev_actress_image = self._ev_actress_image_cls(item['image']) # ev_actress.images.append(ev_actress_image) #else: # ev_actress_image = self._ev_actress_image_cls(item['image']) # ev_actress = self._ev_actress_cls(**(item.to_insert())) # ev_actress.images.append(ev_actress_image) try: ev_actress = ev_actress_query.one() except NoResultFound: #ev_actress_image = self._ev_actress_image_cls(name=item['image'], # original_url=item['image_url']) ev_actress_image = self._ev_actress_image_cls(name=item['image']['name'], original_url=item['image']['original_url']) ev_actress = self._ev_actress_cls(**(item.to_insert())) ev_actress.images.append(ev_actress_image) else: """ Update values if fields (such as name_index_hiragana) other than name and image which are null and items have them """ ev_actress_query.update(item.to_update()) if not item['image']['name'] in ev_actress.images_ap: #ev_actress_image = self._ev_actress_image_cls(item['image']) ev_actress_image = self._ev_actress_image_cls(name=item['image']['name'], original_url=item['image']['original_url']) ev_actress.images.append(ev_actress_image) session.add(ev_actress) session.commit() return ev_actress
def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: category = None category = session.query(self._category_cls).\ filter_by(name=value['name']).\ filter_by(category_type_id=category_type.category_type_id).\ first() if not category: category = self._category_cls(value['name'], value['original_url']) category.category_type = category_type ev_category = self._get_ev_category(session, value['name'], ev_category_type_id) category.ev_category = ev_category elif not category.original_url: category.original_url = value['original_url'] session.add(category) session.commit()
def _get_ev_category(self, category_name, ev_category_type_id): ev_category = None """ Not assume ev_category.name contains category_name if category_name is only one character. """ if len(category_name) != 1: ev_category = session.query(self._ev_category_cls).filter(self._ev_category_cls.name.contains(category_name)).first() if not ev_category: ev_category_list = session.query(self._ev_category_cls).all() for ev_category in ev_category_list: if ev_category.name in category_name: break if not ev_category: ev_category = self._ev_category_cls(category_name) """ if ev_category and (len(category_name) != 1): pass else: ev_category_list = session.query(self._ev_category_cls).all() for ev_category in ev_category_list: if ev_category.name in category_name: break if not ev_category: ev_category = self._ev_category_cls(category_name) """ ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) session.add(ev_category) session.commit() """ Return the id which is added above """ return ev_category
def _to_sqlalchemy(self, item): title = session.query(self._title_cls).filter_by(original_id=item['original_id']).first() if not title: categories = [] for category_name in item['categories']: """ An error occured if category is None """ category = session.query(self._ev_category_cls).filter_by(name=category_name).one() categories.append(category) actresses = [] for actress_name in item['actresses']: actress = session.query(self._ev_actress_cls).filter_by(name=actress_name).first() if not actress: actress = self._actress_cls(actress_name) actresses.append(actress) title = self._title_cls(**item) title.actresses = actresses title.categories = categories session.add(title) session.commit()
def _get_ev_actress(self, item): ev_actress_query = session.query(self._ev_actress_cls).filter_by(name=item['name']) ev_actress = ev_actress_query.first() if ev_actress: """ Update values if fields other than name and image which are null and items have them """ #ev_actress_query.update(self._to_update(item)) ev_actress_query.update(item.to_update()) ev_actress_image = session.query(self._ev_actress_image_cls).filter_by(name=item['image']).first() if not ev_actress_image and item['image']: ev_actress_image = self._ev_actress_image_cls(item['image']) ev_actress.images.append(ev_actress_image) else: ev_actress_image = self._ev_actress_image_cls(item['image']) ev_actress = self._ev_actress_cls(**(item.to_insert())) ev_actress.images.append(ev_actress_image) session.add(ev_actress) session.commit() return ev_actress
def _to_sqlalchemy(self, item): #for item in merge_items(item): for i in item.merge_items(): ev_actress = self._get_ev_actress(i) actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==i['name']).\ first() if not actress: actress = self._actress_cls() actress.ev_actress = ev_actress ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(name=i['image']).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() """ Sould use try / except with one() """ actress_image = session.query(self._actress_image_cls).\ filter_by(ev_actress_image_id=ev_actress_image.ev_actress_image_id).\ first() """ In case that an actress name duplicated on the site then has more than two images of that actress 名前が同一で異なる女優の場合、faceイメージが異なるので画像を追加する """ """ Assume actress_image has already been assigned to actress if it's already existed then do nothing actress_imageが既に存在する場合、それは既にactressにアサインされていると見なすため、 何もしない。 """ if not actress_image: actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image actress.images.append(actress_image) session.add(actress) session.commit()
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ first() #title = session.query(X1XJPTitle).filter_by(original_id=item['original_id']).first() if not title: #title = self._title_cls(**self._to_insert(item)) title = self._title_cls(**item.to_title()) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) title.ev_title = ev_title """ for category """ for c in item['categories']: """ Do nothing if title already has the category """ if not c in title.categories_ap: category = session.query(self._category_cls).\ filter_by(name=c).\ first() if not category: raise NoResultFound title.categories.append(category) """ for title """ if item['actresses']: for a in item['actresses']: """ ev_actress should be in the ev_actress table because it was already inserted in _get_ev_title if it's not existed (ev_actress_image too) """ ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=a).\ one() """ Do nothing if there is the actress in actress_table """ # if not a in title.actresses_ap: # try: # actress = session.query(self._actress_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==a).\ # one() # except NoResultFound: # #ev_actress = session.query(self._ev_actress_cls).\ # # filter_by(name=a).\ # # one() # actress = self._actress_cls() # actress.ev_actress = ev_actress # for image in item['images']['face']: # if not image in actress.images_ap: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image).\ # one() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) # """ これは必要? """ # session.add(actress) # #session.commit() # title.actresses.append(actress) try: actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==a).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress """ Don't need to check if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ for image in item['images']['face']: if not image in actress.images_ap: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ filter_by(name=image).\ one() actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image actress.images.append(actress_image) """ これは必要? """ session.add(actress) if not a in title.actresses_ap: title.actresses.append(actress) """ for image """ for image_type, names in item['images'].iteritems(): for name in names: if not name in title.images_ap: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(name=name).\ filter_by(image_type=image_type).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) session.add(title) session.commit()
def _to_sqlalchemy(self, item): for item in merge_items(item): # ev_actress_image = session.query(self._ev_actress_image_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==item['name']).\ # filter(self._ev_actress_image_cls.name==item['image']).first() # if ev_actress_image: # actress_image = session.query(self._actress_image_cls).\ # filter_by(ev_actress_image_id=ev_actress_image.ev_actress_image_id).first() # """ actress_image should be None if ev_actress_image is None # because actress_image is a child of ev_actress_image """ # if not ev_actress_image or not actress_image: # actress_image = self._actress_image_cls() ev_actress = self._get_ev_actress(item) #actress = session.query(self._actress_cls).filter_by(ev_actress_id=ev_actress.ev_actress_id).first() actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==item['name']).\ first() if not actress: #ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(name=item['image']).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # one() #join(self._ev_actress_cls).\ #filter(self._ev_actress_image_cls.name==item['image']).\ #filter(self._ev_actress_cls.name==item['name']).one() #filter_by(ev_actress_id=ev_actress.ev_actress_id).first() #actress_image = self._actress_image_cls() #actress_image.ev_actress_image = ev_actress_image #ev_actress = self._get_ev_actress(item) actress = self._actress_cls() #actress.image = actress_image actress.ev_actress = ev_actress ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(name=item['image']).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() actress_image = session.query(self._actress_image_cls).\ filter_by(ev_actress_image_id=ev_actress_image.ev_actress_image_id).\ first() """ In case that an actress name duplicated on the site then has more than two images of that actress 名前が同一で異なる女優の場合、faceイメージが異なるので画像を追加する """ """ Assume actress_image has already been assigned to actress if it's already existed then do nothing actress_imageが既に存在する場合、それは既にactressにアサインされていると見なすため、 何もしない。 """ if not actress_image: actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image #actress.image = actress_image actress.images.append(actress_image) session.add(actress) session.commit()
def _get_ev_title(self, item): ev_title = session.query(self._ev_title_cls).filter_by(original_id=item['original_id']).first() if not ev_title: ev_title = self._ev_title_cls(**(item.to_ev_title())) ev_categories = [] for category_name in item['categories']: try: #category = session.query(self._ev_category_cls).filter_by(name=category_name).one() #site_category = session.query(self._category_cls).filter_by(name=category_name).one() """ Some sites (such as x1x.com) might have categories which have the same name which belong to different category type """ site_category = session.query(self._category_cls).filter_by(name=category_name).first() except NoResultFound as e: #print e log.msg(e) else: """ Do nothing if category is None """ ev_categories.append(site_category.ev_category) ev_actresses = [] if item['actresses']: for actress_name in item['actresses']: try: ev_actress = session.query(self._ev_actress_cls).filter_by(name=actress_name).one() except NoResultFound as e: log.msg(e) """ これでいいのか? ev_actress_image, actress, actress_image の処理どうするか? => actressはok, ev_actress_imageどうするか """ ev_actress = self._ev_actress_cls(name=actress_name) ev_actress_image = self._ev_actress_image_cls(item['images']['face']) ev_actress.image.append(ev_actress_image) #else: # """ Do nothing if actress is None """ # ev_actresses.append(ev_actress) ev_actresses.append(ev_actress) ev_title_images = [] for image_type, names in item['images'].iteritems(): for name in names: ev_title_image = session.query(self._ev_title_image_cls).filter_by(name=name).first() if not ev_title_image: ev_title_image = self._ev_title_image_cls(name=name, image_type=image_type) ev_title_images.append(ev_title_image) #ev_title_images = self._ev_title_image_cls(**item['images']) #ev_title = self._ev_title_cls(**(self._to_insert(item))) #ev_title = self._ev_title_cls(**(super(item.__class__, item).to_insert())) #ev_title = self._ev_title_cls(**(item.to_ev_title())) ev_title.categories = ev_categories ev_title.actresses = ev_actresses ev_title.images = ev_title_images # else: # ev_categories = [] # for category_name in item['categories']: # try: # #category = session.query(self._ev_category_cls).filter_by(name=category_name).one() # #site_category = session.query(self._category_cls).filter_by(name=category_name).one() # """ Some sites (such as x1x.com) might have categories which have the same name # which belong to different category type """ # site_category = session.query(self._category_cls).filter_by(name=category_name).first() # except NoResultFound as e: # #print e # log.msg(e) # else: # """ Do nothing if category is None """ # ev_categories.append(site_category.ev_category) # # ev_actresses = [] # if item['actresses']: # for actress_name in item['actresses']: # try: # ev_actress = session.query(self._ev_actress_cls).filter_by(name=actress_name).one() # except NoResultFound as e: # log.msg(e) # """ これでいいのか? # ev_actress_image, actress, actress_image の処理どうするか? """ # ev_actress = self._ev_actress_cls(name=actress_name) # #else: # # """ Do nothing if actress is None """ # # ev_actresses.append(ev_actress) # ev_actresses.append(ev_actress) # # ev_title_images = [] # for image_type, names in item['images'].iteritems(): # for name in names: # ev_title_image = session.query(self._ev_title_image_cls).filter_by(name=name).first() # if not ev_title_image: # ev_title_image = self._ev_title_image_cls(name=name, image_type=image_type) # ev_title_images.append(ev_title_image) # # #ev_title_images = self._ev_title_image_cls(**item['images']) # #ev_title = self._ev_title_cls(**(self._to_insert(item))) # #ev_title = self._ev_title_cls(**(super(item.__class__, item).to_insert())) # #ev_title = self._ev_title_cls(**(item.to_ev_title())) # ev_title.categories = ev_categories # ev_title.actresses = ev_actresses # ev_title.images = ev_title_images session.add(ev_title) session.commit() return ev_title
def _get_ev_title(self, item): ev_title = session.query(self._ev_title_cls).filter_by(original_id=item['original_id']).first() if not ev_title: ev_title = self._ev_title_cls(**(item.to_ev_title())) for category_name in item['categories']: """ Do nothing if ev_title already has the category """ if not category_name in ev_title.categories_ap: #if len(category_name) == 1: # ev_category = session.query(self._ev_category_cls).\ # filter_by(name=category_name).\ # one() #else: # ev_category = session.query(self._ev_category_cls).\ # filter(self._ev_category_cls.name.contains(category_name)).\ # first() try: ev_category = session.query(self._ev_category_cls).\ filter_by(name=category_name).\ one() except NoResultFound: if len(category_name) != 1: ev_category = session.query(self._ev_category_cls).\ filter(self._ev_category_cls.name.contains(category_name)).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None if not ev_category: raise NoResultFound ev_title.categories.append(ev_category) if item['actresses']: for actress_name in item['actresses']: """ Do nothing if ev_title already has the actress """ # if not actress_name in ev_title.actresses_ap: # try: # ev_actress = session.query(self._ev_actress_cls).\ # filter_by(name=actress_name).\ # one() # except NoResultFound: # ev_actress = self._ev_actress_cls(name=actress_name) # actress = self._actress_cls() # actress.ev_actress = ev_actress # for image in item['images']['face']: # if not image in ev_actress.images_ap: # ev_actress_image = self._ev_actress_image_cls(image) # ev_actress.images.append(ev_actress_image) # """ これは必要? """ # session.add(ev_actress) # #session.commit() # ev_title.actresses.append(ev_actress) try: ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=actress_name).\ one() except NoResultFound: ev_actress = self._ev_actress_cls(name=actress_name) actress = self._actress_cls() actress.ev_actress = ev_actress for image in item['images']['face']: if not image in ev_actress.images_ap: """ Don't need to check if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ ev_actress_image = self._ev_actress_image_cls(image) ev_actress.images.append(ev_actress_image) """ これは必要? """ session.add(ev_actress) if not actress_name in ev_title.actresses_ap: ev_title.actresses.append(ev_actress) for image_type, names in item['images'].iteritems(): for name in names: """ Do nothing if ev_title already has the image """ if not name in ev_title.images_ap: ev_title_image = self._ev_title_image_cls(name=name, image_type=image_type) ev_title.images.append(ev_title_image) session.add(ev_title) session.commit() return ev_title
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ first() if not title: #title = self._title_cls(**self._to_insert(item)) title = self._title_cls(**item.to_title()) #title = self._set_relationships(item, title) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) title.ev_title = ev_title """ for categories """ if item['categories']: for c in item['categories']: """ Do nothing if title already has the category """ if not c in title.categories_ap: #if not c in title.categories_ap or not cnvk.convert(c, cnvk.HIRA2KATA) in title.categories_ap or not cnvk.convert(c, cnvk.KATA2HIRA): category = session.query(self._category_cls).\ filter_by(name=c).\ first() if not category: category = session.query(self._category_cls).\ filter((self._category_cls.name==cnvk.convert(c, cnvk.HIRA2KATA)) | \ (self._category_cls.name==cnvk.convert(c, cnvk.KATA2HIRA))).\ first() #if not category: # raise NoResultFound if category: title.categories.append(category) #title.categories.append(category) """ for actresses """ if item['actresses']: for a in item['actresses']: """ ev_actress should be in the ev_actress table because it was already inserted in _get_ev_title if it's not existed (ev_actress_image too) """ ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=a).\ one() """ Do nothing if there is the actress in actress_table """ # if not a in title.actresses_ap: # try: # actress = session.query(self._actress_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==a).\ # one() # except NoResultFound: # #ev_actress = session.query(self._ev_actress_cls).\ # # filter_by(name=a).\ # # one() # actress = self._actress_cls() # actress.ev_actress = ev_actress # for image in item['images']['face']: # if not image in actress.images_ap: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image).\ # one() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) # """ これは必要? """ # session.add(actress) # #session.commit() # title.actresses.append(actress) try: actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==a).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress """ Add images only when actress does not exist """ #if 'face' in item['images']: if item['images']['face']: for image in item['images']['face']: """ Doesn't need to be checked if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ try: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ filter_by(name=image['name']).\ one() except NoResultFound: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ first() if not ev_actress_image.site_actress_image: """ When """ actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image else: actress_image = ev_actress_image.site_actress_image #session.add(actress_image) #session.commit() actress.images.append(actress_image) """ Not sure if the following "add" is needed """ session.add(actress) # for image in item['images']['face']: # if not image in actress.images_ap: # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image['name']).\ # one() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) #""" Not sure if the following "add" is needed """ #session.add(actress) if not a in title.actresses_ap: title.actresses.append(actress) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: if not image['name'] in title.images_ap: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(ev_title_id=ev_title.ev_title_id).\ filter_by(name=image['name']).\ filter_by(image_type=image_type).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) session.add(title) session.commit()
def _get_ev_title(self, item): ev_title = session.query(self._ev_title_cls).\ filter_by(original_id=item['original_id']).\ first() if not ev_title: ev_title = self._ev_title_cls(**(item.to_ev_title())) """ for categories """ if item['categories']: for category_name in item['categories']: """ Do nothing if ev_title already has the category """ #if not category_name in ev_title.categories_ap: if not category_name in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.HIRA2KATA) in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.KATA2HIRA) in ev_title.categories_ap: #if len(category_name) == 1: # ev_category = session.query(self._ev_category_cls).\ # filter_by(name=category_name).\ # one() #else: # ev_category = session.query(self._ev_category_cls).\ # filter(self._ev_category_cls.name.contains(category_name)).\ # first() try: ev_category = session.query(self._ev_category_cls).\ filter_by(name=category_name).\ one() except NoResultFound: if len(category_name) != 1: """ There might be same words which have different Kana (Hiragana and Katanaka) such as "生はめ" and "生ハメ" """ ev_category = session.query(self._ev_category_cls).\ filter((self._ev_actress_cls==cnvk.convert(category_name, cnvk.HIRA2KATA)) | \ (self._ev_actress_cls==cnvk.convert(category_name, cnvk.KATA2HIRA))).\ first() if not ev_category: ev_category = session.query(self._ev_category_cls).\ filter((self._ev_category_cls.name.contains(category_name)) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.HIRA2KATA))) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.KATA2HIRA)))).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None #if not ev_category: # raise NoResultFound if ev_category: ev_title.categories.append(ev_category) #ev_title.categories.append(ev_category) """ for actresses """ if item['actresses']: for actress_name in item['actresses']: """ Do nothing if ev_title already has the actress """ # if not actress_name in ev_title.actresses_ap: # try: # ev_actress = session.query(self._ev_actress_cls).\ # filter_by(name=actress_name).\ # one() # except NoResultFound: # ev_actress = self._ev_actress_cls(name=actress_name) # actress = self._actress_cls() # actress.ev_actress = ev_actress # for image in item['images']['face']: # if not image in ev_actress.images_ap: # ev_actress_image = self._ev_actress_image_cls(image) # ev_actress.images.append(ev_actress_image) # """ これは必要? """ # session.add(ev_actress) # #session.commit() # ev_title.actresses.append(ev_actress) try: ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=actress_name).\ one() except NoResultFound: ev_actress = self._ev_actress_cls(name=actress_name) #actress = self._actress_cls() #actress.ev_actress = ev_actress # """ Add images only when actress does not exist """ #if 'face' in item['images']: if item['images']['face']: for image in item['images']['face']: """ Don't need to check if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ ev_actress_image = self._ev_actress_image_cls(name=image['name'], original_url=image['original_url']) ev_actress.images.append(ev_actress_image) # """ これは必要? """ # session.add(ev_actress) # if item['images']['face']: # for image in item['images']['face']: # #if not image['name'] in ev_actress.images_ap: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # #ev_actress_image = self._ev_actress_image_cls(image) # ev_actress_image = self._ev_actress_image_cls(name=image['name'], # original_url=image['original_url']) # ev_actress.images.append(ev_actress_image) """ これは必要? """ session.add(ev_actress) if not actress_name in ev_title.actresses_ap: ev_title.actresses.append(ev_actress) # for image_type, names in item['images'].iteritems(): # for name in names: # """ Do nothing if ev_title already has the image """ # if not name in ev_title.images_ap: # ev_title_image = self._ev_title_image_cls(name=name, # image_type=image_type) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: """ Do nothing if ev_title already has the image """ if not image['name'] in ev_title.images_ap: ev_title_image = self._ev_title_image_cls(name=image['name'], original_url=image['original_url'], image_type=image_type) ev_title.images.append(ev_title_image) session.add(ev_title) session.commit() return ev_title
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ first() #title = session.query(X1XJPTitle).filter_by(original_id=item['original_id']).first() if not title: #title = self._title_cls(**self._to_insert(item)) title = self._title_cls(**item.to_title()) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) title.ev_title = ev_title title_images = [] for image_type, names in item['images'].iteritems(): for name in names: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(name=name).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title_images.append(title_image) title.images = title_images #categories = [] for c in item['categories']: category = session.query(self._category_cls).filter_by(name=c).first() if category: #categories.append(category) title.categories.append(category) #if categories: # title.categories = categories #actresses = [] if item['actresses']: for a in item['actresses']: """ ev_actress table should be have an actress because it was inserted in _get_ev_title if it's not existed """ ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=a).\ one() ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(name=item['images']['face']).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() #actress = session.query(self._actress_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==a).\ # first() actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==a).\ first() #if actress: #actresses.append(actress) # title.actresses.append(actress) if not actress: actress = self._actress_cls() """ Sould use try / except with one() """ actress_image = session.query(self._actress_image_cls).\ filter_by(ev_actress_image_id=ev_actress_image.ev_actress_image_id).\ first() if not actress_image: actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image actress.images.append(actress_image) actress.ev_actress = ev_actress session.add(actress) session.commit() title.actresses.append(actress) #if actresses: # title.actresses = actresses session.add(title) session.commit()
def _get_ev_title(self, item): #ev_title = session.query(self._ev_title_cls).\ # filter_by(original_id=item['original_id']).\ # first() #if not ev_title: # ev_title = self._ev_title_cls(**(item.to_ev_title())) try: """ original_id might be the same at the other site so need one more filter which is original_url """ ev_title = session.query(self._ev_title_cls).\ filter_by(original_id=item['original_id']).\ filter_by(original_url=item['original_url']).\ one() except NoResultFound: ev_title = self._ev_title_cls(**(item.to_ev_title())) """ for categories """ if item['categories']: for category_name in item['categories']: """ Do nothing if ev_title already has the category """ #if not category_name in ev_title.categories_ap: if not category_name in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.HIRA2KATA) in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.KATA2HIRA) in ev_title.categories_ap: #if len(category_name) == 1: # ev_category = session.query(self._ev_category_cls).\ # filter_by(name=category_name).\ # one() #else: # ev_category = session.query(self._ev_category_cls).\ # filter(self._ev_category_cls.name.contains(category_name)).\ # first() try: ev_category = session.query(self._ev_category_cls).\ filter_by(name=category_name).\ one() except NoResultFound: if len(category_name) != 1: """ There might be same words which have different Kana (Hiragana and Katanaka) such as "生はめ" and "生ハメ" """ ev_category = session.query(self._ev_category_cls).\ filter((self._ev_actress_cls==cnvk.convert(category_name, cnvk.HIRA2KATA)) | \ (self._ev_actress_cls==cnvk.convert(category_name, cnvk.KATA2HIRA))).\ first() if not ev_category: ev_category = session.query(self._ev_category_cls).\ filter((self._ev_category_cls.name.contains(category_name)) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.HIRA2KATA))) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.KATA2HIRA)))).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None #if not ev_category: # raise NoResultFound if ev_category: ev_title.categories.append(ev_category) #ev_title.categories.append(ev_category) """ for actresses """ if item['actresses']: for actress_name in item['actresses']: #""" Do nothing if ev_title already has the actress """ """ Need to check ev_actress and ev_actress_image before checking if the title has them because ev_actress might not have the images. ev_actressがface画像を持っていない場合の事を考えて、 初めにev_actressとev_actress_imageをチェックしてから ev_titleオブジェクトがev_actressを持っているか否を確認する。 """ try: ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=actress_name).\ one() except NoResultFound: ev_actress = self._ev_actress_cls(name=actress_name) #actress = self._actress_cls() #actress.ev_actress = ev_actress #""" Add images only when actress does not exist """ #if item['images']['face']: # for image in item['images']['face']: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # ev_actress_image = self._ev_actress_image_cls(name=image['name'], # original_url=image['original_url']) # ev_actress.images.append(ev_actress_image) #""" これは必要? """ #session.add(ev_actress) """ Add actress face images each time when a new title released. ev_actress.images is sorted by descent created time. 新しいタイトルが追加されるごとに、face画像がある場合にはactressに追加する。 ev_actress.imagesは作成時間が新しい物順に表示される。 """ if item['images']['face']: for image in item['images']['face']: """ Don't need to check if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ if not image['name'] in ev_actress.images_ap: ev_actress_image = self._ev_actress_image_cls(name=image['name'], original_url=image['original_url']) ev_actress.images.append(ev_actress_image) """ これは必要? => 必要。addすることにより、仮に下のif文でTrueにならなくても 次のcommit()でinsert又はupdateされる。 """ session.add(ev_actress) if not actress_name in ev_title.actresses_ap: ev_title.actresses.append(ev_actress) # for image_type, names in item['images'].iteritems(): # for name in names: # """ Do nothing if ev_title already has the image """ # if not name in ev_title.images_ap: # ev_title_image = self._ev_title_image_cls(name=name, # image_type=image_type) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: """ Do nothing if ev_title already has the image """ if not image['name'] in ev_title.images_ap: ev_title_image = self._ev_title_image_cls(name=image['name'], original_url=image['original_url'], image_type=image_type) ev_title.images.append(ev_title_image) session.add(ev_title) session.commit() return ev_title
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle #title = session.query(self._title_cls).\ # join(self._ev_title_cls).\ # filter(self._ev_title_cls.original_id==item['original_id']).\ # first() #if not title: #title = self._title_cls(**self._to_insert(item)) # title = self._title_cls(**item.to_title()) #title = self._set_relationships(item, title) try: title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ one() except NoResultFound: title = self._title_cls(**item.to_title()) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) try: title.ev_title = ev_title except Exception as e: print '### debug ###' print title.site_id print item['original_id'] print self._title_cls print '### debug ###' raise e """ for categories """ if item['categories']: for c in item['categories']: """ Do nothing if title already has the category """ if not c in title.categories_ap: #if not c in title.categories_ap or not cnvk.convert(c, cnvk.HIRA2KATA) in title.categories_ap or not cnvk.convert(c, cnvk.KATA2HIRA): category = session.query(self._category_cls).\ filter_by(name=c).\ first() if not category: category = session.query(self._category_cls).\ filter((self._category_cls.name==cnvk.convert(c, cnvk.HIRA2KATA)) | \ (self._category_cls.name==cnvk.convert(c, cnvk.KATA2HIRA))).\ first() #if not category: # raise NoResultFound if category: title.categories.append(category) #title.categories.append(category) """ for actresses """ if item['actresses']: for a in item['actresses']: """ ev_actress should be in the ev_actress table because it was already inserted in _get_ev_title if it's not existed (ev_actress_image too) """ ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=a).\ one() #""" Do nothing if there is the actress in actress_table """ """ Need to check actress and actress_image before checking if the title has them because actress might not have the images. actressがface画像を持っていない場合の事を考えて、 初めにactressとactress_imageをチェックしてから titleオブジェクトがactressを持っているか否を確認する。 """ try: actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==a).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress #""" Add images only when actress does not exist """ #if 'face' in item['images']: # if item['images']['face']: # for image in item['images']['face']: # if not image['name'] in actress.images_ap: # """ Doesn't need to be checked if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # try: # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image['name']).\ # one() # except NoResultFound: # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # first() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) # """ Not sure if the following "add" is needed # => Needed to commit actress object # """ # session.add(actress) if item['images']['face']: for image in item['images']['face']: if not image['name'] in actress.images_ap: """ ev_actress_image should be one because it's created in _get_ev_title() """ ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ filter_by(name=image['name']).\ one() actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image actress.images.append(actress_image) """ Not sure if the following "add" is needed => Needed to commit actress object """ session.add(actress) if not a in title.actresses_ap: title.actresses.append(actress) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: if not image['name'] in title.images_ap: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(ev_title_id=ev_title.ev_title_id).\ filter_by(name=image['name']).\ filter_by(image_type=image_type).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) session.add(title) session.commit()