def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: category = None if isinstance(value, unicode): category = session.query(self._category_cls).\ filter_by(name=value).\ filter_by(category_type_id=category_type.category_type_id).\ first() else: category = session.query(self._category_cls).\ filter_by(name=value['name']).\ filter_by(category_type_id=category_type.category_type_id).\ first() if not category: if isinstance(value, unicode): category = self._category_cls(value) ev_category = self._get_ev_category(session, value, ev_category_type_id) else: category = self._category_cls(**value.to_category()) ev_category = self._get_ev_category(session, value['name'], ev_category_type_id) category.category_type = category_type #ev_category = self._get_ev_category(session, value, ev_category_type_id) category.ev_category = ev_category session.add(category) session.commit()
def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: category = None #category = session.query(self._category_cls).filter_by(name=encoded_value).filter_by(category_type_id=category_type.category_type_id).first() category = session.query(self._category_cls).\ filter_by(name=value).\ filter_by(category_type_id=category_type.category_type_id).\ first() #categories = session.query(self._category_cls).filter_by(name=encoded_value).all() #if categories: # for c in categories: # if c.category_type_id == category_type.category_type_id: # """ # The category name in that category type is already registered. # Skip the following procedure. # """ # print '@@@ The keyword %s is already registered. @@@' % category.name # category = c # break if not category: #category = self._category_cls(encoded_value) category = self._category_cls(value) #category = self._category_cls() category.category_type = category_type #ev_category = self._get_ev_category(encoded_value, ev_category_type_id) ev_category = self._get_ev_category(session, value, ev_category_type_id) category.ev_category = ev_category session.add(category) session.commit()
def _to_sqlalchemy(self, item): category = session.query(self._ippondo_category).\ filter_by(original_url=item['category_url']).\ one() for td in item['title_description']: try: """ Retrieve a title from the title_description """ title = session.query(self._ippondo_title).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.description.like('%s%%' % td)).\ one() except NoResultFound: pass except MultipleResultsFound: raise MultipleResultsFound(td.encode('utf-8')) else: if not category.ev_category.name in title.ev_title.categories_ap: title.ev_title.categories.append(category.ev_category) if not category.name in title.categories_ap: title.categories.append(category) session.add(title) session.commit()
def _to_sqlalchemy(self, item): #for item in merge_items(item): for i in item.merge_items(): ev_actress = self._get_ev_actress(i) #actress = session.query(self._actress_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==i['name']).\ # first() #if not actress: # actress = self._actress_cls() # actress.ev_actress = ev_actress try: actress = session.query(self._actress_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress if not i['image'] in actress.images_ap: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(name=i['image']).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ one() actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image """ In case that an actress name duplicated on the site then has more than two images of that actress 名前が同一で異なる女優の場合、faceイメージが異なるので画像を追加する """ actress.images.append(actress_image) session.add(actress) session.commit()
def _get_ev_actress(self, item): ev_actress = session.query(self._ev_actress_cls).filter_by(name=item['name']).first() if ev_actress: new_item = item.copy() del new_item['name'] del new_item['image'] for key, value in item.iteritems(): if not getattr(ev_actress, key): setattr(ev_actress, key, value) ev_actress_image = session.query(self._ev_actress_image_cls).filter_by(name=item['image']).first() if not ev_actress_image: ev_actress_image = self._ev_actress_image_cls(item['image']) ev_actress.images.append(ev_actress_image) else: #if not 'actress_name_hiragana' in item or not 'actress_name_katakana' in item: # """ Both should be None if one of them does not appear in item """ # item['actress_name_hiragana'] = item['actress_name_katakana'] = None #ev_actress = self._ev_actress_cls(item['actress_name'], # item['actress_name_hiragana'], # item['actress_name_katakana'], # item['actress_name_index_hiragana'], # item['actress_name_index_katakana']) ev_actress_image = self._ev_actress_image_cls(item['image']) del item['image'] ev_actress = self._ev_actress_cls(**item) ev_actress.images.append(ev_actress_image) return ev_actress
def _get_ev_category(self, category_name, ev_category_type_id): #from erovideo.models import session #from erovideo.models import Session #session = Session() ev_category = None """ Not assume ev_category.name contains category_name if category_name is only one character. """ if len(category_name) != 1: print '### 1 ###' ev_category = session.query(self._ev_category_cls).filter(self._ev_category_cls.name.contains(category_name)).first() #if not ev_category: # print '### 2 ###' # ev_category_list = session.query(self._ev_category_cls).all() # #for ev_category in ev_category_list: # for ec in ev_category_list: # if ec.name in category_name: # print '### 2.5 ###' # print '@@@ ec.name & category_name @@@' # print ec.name # print category_name # print '@@@ ec.name & category_name end @@@' # ev_category = ec # break if not ev_category: print '### 3 ###' ev_category = self._ev_category_cls(category_name) #if ev_category and (len(category_name) != 1): # pass #else: # ev_category_list = session.query(self._ev_category_cls).all() # for ev_category in ev_category_list: # if ev_category.name in category_name: # break #if not ev_category: # ev_category = self._ev_category_cls(category_name) ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) #session.add(ev_category) #session.commit() print '@@@ ev_category.category_id @@@' print ev_category.category_id print '@@@ ev_category.category_id end @@@' """ Return the id which is added above """ return ev_category
def _get_ev_category(self, session, category_name, ev_category_type_id): #ev_category = None """ Not assume ev_category.name contains category_name if category_name is only one character. """ #if len(category_name) == 1: # ev_category = session.query(self._ev_category_cls).\ # filter_by(name=category_name).\ # first() #else: # ev_category = session.query(self._ev_category_cls).\ # filter(self._ev_category_cls.name.contains(category_name)).\ # first() try: ev_category = session.query(self._ev_category_cls).\ filter_by(name=category_name).\ one() except NoResultFound: if len(category_name) != 1: ev_category = session.query(self._ev_category_cls).\ filter(self._ev_category_cls.name.contains(category_name)).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None #if not ev_category: # ev_category_list = session.query(self._ev_category_cls).all() # #for ev_category in ev_category_list: # for ec in ev_category_list: # if ec.name in category_name: # ev_category = ec # break if not ev_category: ev_category = self._ev_category_cls(category_name) #if ev_category and (len(category_name) != 1): # pass #else: # ev_category_list = session.query(self._ev_category_cls).all() # for ev_category in ev_category_list: # if ev_category.name in category_name: # break #if not ev_category: # ev_category = self._ev_category_cls(category_name) ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) return ev_category
def _get_ev_title(self, item): ev_title = session.query(self._ev_title_cls).filter_by(original_id=item['original_id']).first() if ev_title: pass else: ev_categories = [] for category_name in item['categories']: try: #category = session.query(self._ev_category_cls).filter_by(name=category_name).one() site_category = session.query(self._category_cls).filter_by(name=category_name).one() except NoResultFound as e: #print e log.msg(e) else: """ Do nothing if category is None """ ev_categories.append(site_category.ev_category) ev_actresses = [] for actress_name in item['actresses']: try: ev_actress = session.query(self._ev_actress_cls).filter_by(name=actress_name).one() except NoResultFound as e: #print e log.msg(e) else: """ Do nothing if actress is None """ ev_actresses.append(ev_actress) ev_title_images = {} for image_type, names in item['images'].iteritems(): ev_title_images = [] for name in names: ev_title_image = session.query(self._ev_title_image_cls).filter_by(name=name).first() if not ev_title_image: ev_title_image = self._ev_title_image_cls(name=name, image_type=image_type) ev_title_images.append(ev_title_image) ev_title_images[image_type] = ev_title_images #ev_title_images = self._ev_title_image_cls(**item['images']) ev_title = self._ev_title_cls(**(self._to_insert(item))) ev_title.categories = ev_categories ev_title.actresses = ev_actresses ev_title.images = ev_title_images session.add(ev_title) session.commit() return ev_title
def _get_ev_actress(self, item): ev_actress_query = session.query(self._ev_actress_cls).filter_by(name=item['name']) try: ev_actress = ev_actress_query.one() except NoResultFound: ev_actress = self._ev_actress_cls(**(item.to_insert())) """ The following procedure is not needed becuase if not item['image']['name'] in ev_actress.images_ap part takes care of it. """ #ev_actress_image = self._ev_actress_image_cls(name=item['image']['name'], # original_url=item['image']['original_url']) #ev_actress.images.append(ev_actress_image) else: """ Update values if fields (such as name_index_hiragana) other than name and image which are null and items have them """ ev_actress_query.update(item.to_update()) if not item['image']['name'] in ev_actress.images_ap: ev_actress_image = self._ev_actress_image_cls(name=item['image']['name'], original_url=item['image']['original_url']) ev_actress.images.append(ev_actress_image) session.add(ev_actress) session.commit() return ev_actress
def execute(self): try: limit = int(self.get_argument('limit', default=15)) page = int(self.get_argument('page', default=0)) """ for Exception test """ #limit = self.get_argument('limit') #page = self.get_argument('page') except Exception as e: return self.handle_request_exception(e) else: data = self.emc.get(self.emc.get_key_format() % (page, limit)) if not data: titles = session.query(EVJPTitle).\ order_by(desc(EVJPTitle.released_date)).\ order_by(desc(EVJPTitle.ev_title_id)).\ slice(page * limit, page * limit + limit).\ all() try: data = self.to_json(titles, limit, page) except RuntimeError: data = self.to_json([], limit, page) else: self.emc.set(self.emc.get_key_format() % (page, limit), data) self.start_response(self.get_status(), self.get_response_headers()) return iter([data])
def _set_relationships(self, item, title): if item['actress_type']: for at in item['actress_type']: if not at in title.actress_type_ap: actress_type = session.query(self._category_cls).filter_by(name=at).first() if actress_type: title.actress_type.append(actress_type) if item['play']: for p in item['play']: if not p in title.play_ap: play = session.query(self._category_cls).filter_by(name=p).first() if play: title.play.append(play) return title
def _to_sqlalchemy(self, item): arg = {self._key: item[self._key]} new_title = session.query(self._new_title_cls).filter_by(**arg).first() if not new_title: new_title = self._new_title_cls(**item) session.add(new_title) session.commit()
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ first() #title = session.query(X1XJPTitle).filter_by(original_id=item['original_id']).first() if not title: #title = self._title_cls(**self._to_insert(item)) title = self._title_cls(self._to_insert(item)) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) title.ev_title = ev_title for image_type, names in item['images'].iteritems(): for name in names: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(name=name).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) #categories = [] for c in item['categories']: category = session.query(self._category_cls).filter_by(name=c).first() if category: #categories.append(category) title.categories.append(category) #if categories: # title.categories = categories #actresses = [] for a in item['actresses']: actress = session.query(self._actress_cls).filter_by(name=a).first() if actress: #actresses.append(actress) title.actresses.append(actress) #if actresses: # title.actresses = actresses session.add(title) session.commit()
def execute(self): #site_ids = [int(site_id) for site_id in self.kwargs['site_ids'].split('+ ')] #site_ids = site_ids.split('+') try: limit = int(self.get_argument('limit', default=15)) page = int(self.get_argument('page', default=0)) except Exception as e: return self.handle_request_exception(e) else: data = self.emc.get(self.emc.get_key_format() % (str(self.kwargs['site_ids']), page, limit)) if not data: site_ids = [int(site_id) for site_id in re.split(r' |\+', self.kwargs['site_ids'])] title_list = session.query(EVJPTitle).\ join(SiteJPTitle).\ filter(SiteJPTitle.site_id.in_(site_ids)).\ order_by(desc(EVJPTitle.released_date), desc(EVJPTitle.ev_title_id)).\ slice(page * limit, page * limit + limit).\ all() # limit = limit / len(site_ids) # title_list = [] # for site_id in site_ids: # titles = session.query(EVJPTitle).\ # join(SiteJPTitle).\ # filter(SiteJPTitle.site_id==site_id).\ # order_by(desc(EVJPTitle.released_date)).\ # slice(page * limit, page * limit + limit).\ # all() #options(joinedload(EVJPTitle.site_title)).\ # title_list += titles #title_list = sorted(title_list, key=lambda title: title.released_date, reverse=True) # title_list = sorted(title_list, key=lambda title: (title.released_date, title.ev_title_id), reverse=True) try: data = self.to_json(title_list, limit, page) except RuntimeError: data = self.to_json([], limit, page) else: self.emc.set(self.emc.get_key_format() % (str(self.kwargs['site_ids']), page, limit), data) #site_ids = [int(site_id) for site_id in re.split(r' |\+', self.kwargs['site_ids'])] #site_ids = [int(site_id) for site_id in re.split(r' |\+', site_ids)] #title_list = [] #for site_id in site_ids: # data = self.emc.get(self.emc.get_key_format() % (site_id, page, limit)) # titles = session.query(EVJPTitle).\ # join(SiteJPTitle).\ # filter(SiteJPTitle.site_id==site_id).\ # order_by(desc(EVJPTitle.released_date)).\ # slice(page * limit, page * limit + limit).\ # all() # title_list += titles #title_list = sorted(title_list, key=lambda title: title.released_date, reverse=True) #self.set_response_header('Content-Type', 'application/json') self.start_response(self.get_status(), self.get_response_headers()) return iter([data])
def _get_ev_actress(self, item): ev_actress = session.query(self._ev_actress_cls).filter_by(name=item['actress_name']).first() if ev_actress: ev_actress_image = session.query(self._ev_actress_image_cls).filter_by(name=item['image_name']).first() if not ev_actress_image: ev_actress_image = self._ev_actress_image_cls(item['image_name']) ev_actress.ev_jp_actress_images.append(ev_actress_image) else: if not 'actress_name_kana' in item: item['actress_name_kana'] = None ev_actress = self._ev_actress_cls(item['actress_name'], item['actress_name_kana'], item['actress_name_index']) ev_actress_image = self._ev_actress_image_cls(item['image_name']) ev_actress.ev_jp_actress_images.append(ev_actress_image) return ev_actress
def _get_media_info(self): title_images = session.query(self._title_image_cls).\ join(self._ev_title_image_cls).\ filter(self._ev_title_image_cls.created > datetime.today() - timedelta(weeks=1)).\ all() #limit(160) return title_images
def parse(self, response): item = OnacleActressImageItem() actress_images = session.query(self._actress_image_cls).\ join(self._ev_actress_image_cls).\ filter(self._ev_actress_image_cls.created > datetime.today() - timedelta(weeks=1)).\ all() item['image_urls'] = [actress_image.original_url for actress_image in actress_images] return item
def _set_relationships(self, item, title): if item['series']: for s in item['series']: if not s in title.series_ap: series = session.query(self._category_cls).filter_by(name=s).first() if series: title.series.append(series) return title
def _to_sqlalchemy(self, item): #xml_feed_item = self._exists(session, item) arg = {self._key: item[self._key]} xml_feed_item = session.query(self._xml_feed_item_cls).filter_by(**arg).first() if not xml_feed_item: xml_feed_item = self._xml_feed_item_cls(**item) session.add(xml_feed_item) session.commit()
def _get_ev_actress(self, item): ev_actress_query = session.query(self._ev_actress_cls).filter_by(name=item['name']) #ev_actress = session.query(self._ev_actress_cls).filter_by(name=item['name']).first() ev_actress = ev_actress_query.first() #new_item = item.copy() if ev_actress: #del new_item['name'] #del new_item['image'] """ Update values if fields other than name and image which are null and items have them """ #for key, value in new_item.iteritems(): # if not getattr(ev_actress, key) and value: # setattr(ev_actress, key, value) ev_actress_query.update(self._to_update(item)) ev_actress_image = session.query(self._ev_actress_image_cls).filter_by(name=item['image']).first() if not ev_actress_image and item['image']: ev_actress_image = self._ev_actress_image_cls(item['image']) ev_actress.images.append(ev_actress_image) else: #if not 'actress_name_hiragana' in item or not 'actress_name_katakana' in item: # """ Both should be None if one of them does not appear in item """ # item['actress_name_hiragana'] = item['actress_name_katakana'] = None #ev_actress = self._ev_actress_cls(item['actress_name'], # item['actress_name_hiragana'], # item['actress_name_katakana'], # item['actress_name_index_hiragana'], # item['actress_name_index_katakana']) ev_actress_image = self._ev_actress_image_cls(item['image']) #del new_item['image'] #ev_actress = self._ev_actress_cls(**new_item) #ev_actress = self._ev_actress_cls(**(self._to_insert(item))) print '### item ###' print type(item) print '### item end ###' ev_actress = self._ev_actress_cls(**(item.to_insert())) ev_actress.images.append(ev_actress_image) session.add(ev_actress) session.commit() return ev_actress
def parse(self, response): item = IppondoActressImageItem() actress_images = ( session.query(self._actress_image_cls) .join(self._ev_actress_image_cls) .filter(self._ev_actress_image_cls.created > datetime.today() - timedelta(weeks=1)) .all() ) item["image_urls"] = [actress_image.original_url for actress_image in actress_images] return item
def _get_ev_category(self, category_name, ev_category_type_id): ev_category = session.query(self._ev_category_cls).filter(self._ev_category_cls.name.contains(category_name)).first() if ev_category: return ev_category else: ev_category_list = session.query(self._ev_category_cls).all() for ev_category in ev_category_list: if ev_category.name in category_name: return ev_category ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() ev_category = self._ev_category_cls(category_name) #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) session.add(ev_category) session.commit() """ Return the id which is added above """ return ev_category
def main(): for new_title_cls in new_title_clsses: new_titles = session.query(new_title_cls).\ filter(new_title_cls.created > datetime.today() - timedelta(weeks=1)).\ all() for new_title in new_titles: new_title.scraped = True session.add(new_title) session.commit()
def _to_sqlalchemy(self, item): for key, value_list in item.iteritems(): category_type = session.query(self._category_type_cls).filter_by(name_en=key).one() ev_category_type_id = category_type.ev_category_type_id for value in value_list: category = None category = session.query(self._category_cls).\ filter_by(name=value['name']).\ filter_by(category_type_id=category_type.category_type_id).\ first() if not category: category = self._category_cls(value['name'], value['original_url']) category.category_type = category_type ev_category = self._get_ev_category(session, value['name'], ev_category_type_id) category.ev_category = ev_category elif not category.original_url: category.original_url = value['original_url'] session.add(category) session.commit()
def _get_ev_category(self, category_name, ev_category_type_id): ev_category = None """ Not assume ev_category.name contains category_name if category_name is only one character. """ if len(category_name) != 1: ev_category = session.query(self._ev_category_cls).filter(self._ev_category_cls.name.contains(category_name)).first() if not ev_category: ev_category_list = session.query(self._ev_category_cls).all() for ev_category in ev_category_list: if ev_category.name in category_name: break if not ev_category: ev_category = self._ev_category_cls(category_name) """ if ev_category and (len(category_name) != 1): pass else: ev_category_list = session.query(self._ev_category_cls).all() for ev_category in ev_category_list: if ev_category.name in category_name: break if not ev_category: ev_category = self._ev_category_cls(category_name) """ ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) session.add(ev_category) session.commit() """ Return the id which is added above """ return ev_category
def _to_sqlalchemy(self, item): title = session.query(self._title_cls).filter_by(original_id=item['original_id']).first() if not title: categories = [] for category_name in item['categories']: """ An error occured if category is None """ category = session.query(self._ev_category_cls).filter_by(name=category_name).one() categories.append(category) actresses = [] for actress_name in item['actresses']: actress = session.query(self._ev_actress_cls).filter_by(name=actress_name).first() if not actress: actress = self._actress_cls(actress_name) actresses.append(actress) title = self._title_cls(**item) title.actresses = actresses title.categories = categories session.add(title) session.commit()
def _set_relationships(self, item, title): if item["series"]: # series_list = [] for s in item["series"]: if not s in title.series_ap: series = session.query(self._category_cls).filter_by(name=s).first() if series: # series_list.append(series) title.series.append(series) # title.series = series_list if item["actress_type"]: # actress_type_list = [] for at in item["actress_type"]: if not at in title.actress_type_ap: actress_type = session.query(self._category_cls).filter_by(name=at).first() if actress_type: # actress_type_list.append(actress_type) title.actress_type.append(actress_type) # title.actress_type = actress_type_list return title
def _get_ev_category(self, session, category_name, ev_category_type_id): ev_category = None """ Not assume ev_category.name contains category_name if category_name is only one character. """ if len(category_name) == 1: ev_category = session.query(self._ev_category_cls).filter_by(name=category_name).first() else: ev_category = session.query(self._ev_category_cls).filter(self._ev_category_cls.name.contains(category_name)).first() #if not ev_category: # ev_category_list = session.query(self._ev_category_cls).all() # #for ev_category in ev_category_list: # for ec in ev_category_list: # if ec.name in category_name: # ev_category = ec # break if not ev_category: ev_category = self._ev_category_cls(category_name) #if ev_category and (len(category_name) != 1): # pass #else: # ev_category_list = session.query(self._ev_category_cls).all() # for ev_category in ev_category_list: # if ev_category.name in category_name: # break #if not ev_category: # ev_category = self._ev_category_cls(category_name) ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() #ev_category.category_type = ev_category_type ev_category.category_types.append(ev_category_type) return ev_category
def _set_relationships(self, item, title): if item['figure']: for f in item['figure']: if not f in title.figure_ap: figure = session.query(self._category_cls).filter_by(name=f).first() if figure: title.figure.append(figure) if item['play']: for p in item['play']: if not p in title.play_ap: play = session.query(self._category_cls).filter_by(name=p).first() if play: title.play.append(play) if item['genre']: for g in item['genre']: if not g in title.genre_ap: genre = session.query(self._category_cls).filter_by(name=g).first() if genre: title.play.append(genre) return title
def _get_ev_category(self, session, category_name, ev_category_type_id): """ Not assume ev_category.name contains category_name if category_name is only one character. """ try: ev_category = session.query(self._ev_category_cls).filter_by(name=category_name).one() except NoResultFound: if len(category_name) != 1: ev_category = session.query(self._ev_category_cls).\ filter(self._ev_category_cls.name.contains(category_name)).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None if not ev_category: ev_category = self._ev_category_cls(category_name) ev_category_type = session.query(EVCategoryType).filter_by(category_type_id=ev_category_type_id).one() ev_category.category_types.append(ev_category_type) return ev_category