def __getitem__(self, key): if key == 'name': str_list = self._hxs.select(self._xpath_config[key]).extract() return [s.strip() for s in str_list] elif key == 'name_index_hiragana' or key == 'name_index_katakana': str_list = self._hxs.select(self._xpath_config['name_index']).extract() if str_list: #name_index = self._hxs.select(self._xpath_config['name_index']).extract()[0][1] name_index = str_list[0][1] if key == 'name_index_hiragana': #return [i for i in itertools.repeat(cnvk.convert(name_index.encode('utf-8'), cnvk.KATA2HIRA), len(self['name']))] return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(self['name']))] elif key == 'name_index_katakana': #return [i for i in itertools.repeat(cnvk.convert(name_index.encode('utf-8'), cnvk.HIRA2KATA), len(self['name']))] return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(self['name']))] else: raise Exception else: """ Emply list is returned if kana=other """ return [i for i in itertools.repeat(None, len(self['name']))] elif key == 'image': image_urls = self._hxs.select(self._xpath_config[key]).extract() #return ['%s.jpg' % hashlib.sha1(image_url).hexdigest() for image_url in image_urls] #return [(get_image_name(image_url), image_url) for image_url in image_urls] return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls] else: raise KeyError(key)
def __getitem__(self, key): if key == 'name': str_list = self._hxs.select(self._xpath_config[key]).extract() return [s.strip() for s in str_list] elif key == 'name_index_hiragana' or key == 'name_index_katakana': str_list = self._hxs.select(self._xpath_config['name_index']).re(ur'「.」') if str_list: name_index = str_list[0].strip(u'「').strip(u'」').strip() if key == 'name_index_hiragana': return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(self._hxs.select(self._xpath_config['name']).extract()))] elif key == 'name_index_katakana': return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(self._hxs.select(self._xpath_config['name']).extract()))] else: raise KeyError(key) else: raise ValueError(str_list) elif key == 'image': #image_urls = self._hxs.select(self._xpath_config[key]).extract() image_urls = ['http://my.tokyo-hot.com%s' % image_url for image_url in self._hxs.select(self._xpath_config[key]).extract()] #return [{'name': get_image_name(image_url), # 'original_url': 'http://my.tokyo-hot.com%s' % image_url} for image_url in image_urls] return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls] else: raise KeyError(key)
def __getitem__(self, key): if key == 'name': #str_list = self._hxs.select(self._xpath_config[key]).extract() #return [s.strip() for s in str_list] name_list = [] name_index_hiragana_list = [] name_index_katakana_list = [] for i in range(len(self._hxs.select(self._xpath_config['name_index']))): #for xpath in self._xpath_config[key]: str_list = self._hxs.select(self._xpath_config[key][i]).extract() name_index = self._hxs.select(self._xpath_config['name_index']).extract()[i] try: name_index_hiragana_list += [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(str_list))] name_index_katakana_list += [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(str_list))] except TypeError: raise TypeError(type(name_index_hiragana_list)) name_list += [s.strip() for s in str_list] self['name_index_hiragana'] = name_index_hiragana_list self['name_index_katakana'] = name_index_katakana_list return name_list elif key == 'name_index_hiragana' or key == 'name_index_katakana': #return self[key] pass """ str_list = self._hxs.select(self._xpath_config['name_index']).extract() if str_list: name_index = str_list[0][1] #name_index = str_list if key == 'name_index_hiragana': return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(self['name']))] elif key == 'name_index_katakana': return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(self['name']))] else: #raise Exception raise KeyError(key) else: return [i for i in itertools.repeat(None, len(self['name']))] """ elif key == 'image': #image_urls = self._hxs.select(self._xpath_config[key]).extract() image_urls = ['http://www.caribbeancom.com%s' % image_url for image_url in self._hxs.select(self._xpath_config[key]).extract()] #image_urls = self['image_url'] #return ['%s.jpg' % hashlib.sha1(image_url).hexdigest() for image_url in image_urls] #return [{'name': get_image_name(image_url), 'original_url': 'http://www.caribbeancom.com%s' % image_url} # for image_url in image_urls] return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls] else: raise KeyError(key)
def _merge_items(self): new_item = {} for key, value in self._item.iteritems(): if key == 'actress_names': #new_item['actress_name'] = value new_item['actress_name'] = [v.strip() for v in value] elif key == 'actress_name_kana': new_item['actress_name_hiragana'] = [cnvk.convert(i, cnvk.KATA2HIRA) for i in value] new_item['actress_name_katakana'] = [cnvk.convert(i, cnvk.HIRA2KATA) for i in value] elif key == 'actress_name_index': if value: new_item['actress_name_index_hiragana'] = [i for i in itertools.repeat(cnvk.convert(value.decode('utf-8'), cnvk.KATA2HIRA), len(self._item['actress_names']))] new_item['actress_name_index_katakana'] = [i for i in itertools.repeat(cnvk.convert(value.decode('utf-8'), cnvk.HIRA2KATA), len(self._item['actress_names']))] else: new_item['actress_name_index_hiragana'] = new_item['actress_name_index_katakana'] = [i for i in itertools.repeat(value, len(self._item['actress_names']))] elif key == 'image_urls': new_item['image_name'] = ['%s.jpg' % hashlib.sha1(i).hexdigest() for i in value] else: new_item[key] = value return merge_items(new_item)
def __getitem__(self, key): if key == 'name': name_list = [] for i in range(len(self._xpath_config[key])): for name in self._hxs.select(self._xpath_config[key][i]).extract(): name_list.append(name.split()[0].strip()) #name_list += name_list return name_list elif key == 'name_hiragana' or key == 'name_katakana': name_kana_list = [] for i in range(len(self._xpath_config['name'])): for name in self._hxs.select(self._xpath_config['name'][i]).extract(): if len(name.split()) == 2: if key == 'name_hiragana': name_kana_list.append(cnvk.convert(name.split()[1].strip(), cnvk.KATA2HIRA)) elif key == 'name_katakana': name_kana_list.append(cnvk.convert(name.split()[1].strip(), cnvk.HIRA2KATA)) else: name_kana_list.append(None) return name_kana_list elif key == 'name_index_hiragana' or key == 'name_index_katakana': name_index_kana_list = [] for i in range(len(self._hxs.select(self._xpath_config['name_index']))): name_temp_list = self._hxs.select(self._xpath_config['name'][i]).extract() name_index = self._hxs.select(self._xpath_config['name_index']).extract()[i] try: if key == 'name_index_hiragana': name_index_kana_list += list(itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(name_temp_list))) elif key == 'name_index_katakana': name_index_kana_list += list(itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(name_temp_list))) else: raise TypeError except TypeError: raise TypeError(type(name_index_kana_list)) return name_index_kana_list elif key == 'image': image_urls = self._hxs.select(self._xpath_config[key]).extract() return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls] else: raise KeyError(key)
def __getitem__(self, key): if key == 'name': str_list = self._hxs.select(self._xpath_config[key]).extract() return [s.strip() for s in str_list] elif key == 'name_index_hiragana' or key == 'name_index_katakana': str_list = self._hxs.select(self._xpath_config['name_index']).extract() if str_list: name_index = str_list[0].strip() if key == 'name_index_hiragana': return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(self._hxs.select(self._xpath_config['name']).extract()))] elif key == 'name_index_katakana': return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(self._hxs.select(self._xpath_config['name']).extract()))] else: raise Exception else: """ Emply list is returned if kana=other """ return [i for i in itertools.repeat(None, len(self._hxs.select(self._xpath_config['name']).extract()))] elif key == 'image': image_urls = self._hxs.select(self._xpath_config[key]).extract() return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls] else: raise KeyError(key)
def __getitem__(self, key): if key == 'name': return [name.strip() for name in self._hxs.select(self._xpath_config[key]).extract()] elif key == 'name_index_hiragana' or key == 'name_index_katakana': name_index_kana_list = [] indices = self._hxs.select(self._xpath_config['name_index']).extract() for i in range(len(indices)): if i + 1 == len(indices): pattern = re.compile(r'<dt>%s</dt>[\S\s]*</dl>' % indices[i].encode('utf-8')) else: pattern = re.compile(r'<dt>%s</dt>[\S\s]*<dt>%s</dt>' % (indices[i].encode('utf-8'), indices[i + 1].encode('utf-8'))) actresses = [re.sub('alt="|" />', '', name) for name in re.findall(r'alt=".* />', re.findall(pattern, self._response._body)[0])] try: if key == 'name_index_hiragana': name_index_kana_list += list(itertools.repeat(cnvk.convert(indices[i], cnvk.KATA2HIRA), len(actresses))) elif key == 'name_index_katakana': name_index_kana_list += list(itertools.repeat(cnvk.convert(indices[i], cnvk.HIRA2KATA), len(actresses))) else: raise TypeError except TypeError: raise TypeError(type(name_index_kana_list)) else: i += 1 return name_index_kana_list elif key == 'image': #image_urls = self._hxs.select(self._xpath_config[key]).extract() image_urls = ['http://www.heyzo.com%s' % image_url for image_url in self._hxs.select(self._xpath_config[key]).extract()] #return [{'name': get_image_name('http://www.heyzo.com/%s' % image_url), # 'original_url': 'http://www.heyzo.com/%s' % image_url} for image_url in image_urls] return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls] else: raise KeyError(key)
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ first() if not title: #title = self._title_cls(**self._to_insert(item)) title = self._title_cls(**item.to_title()) #title = self._set_relationships(item, title) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) title.ev_title = ev_title """ for categories """ if item['categories']: for c in item['categories']: """ Do nothing if title already has the category """ if not c in title.categories_ap: #if not c in title.categories_ap or not cnvk.convert(c, cnvk.HIRA2KATA) in title.categories_ap or not cnvk.convert(c, cnvk.KATA2HIRA): category = session.query(self._category_cls).\ filter_by(name=c).\ first() if not category: category = session.query(self._category_cls).\ filter((self._category_cls.name==cnvk.convert(c, cnvk.HIRA2KATA)) | \ (self._category_cls.name==cnvk.convert(c, cnvk.KATA2HIRA))).\ first() #if not category: # raise NoResultFound if category: title.categories.append(category) #title.categories.append(category) """ for actresses """ if item['actresses']: for a in item['actresses']: """ ev_actress should be in the ev_actress table because it was already inserted in _get_ev_title if it's not existed (ev_actress_image too) """ ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=a).\ one() """ Do nothing if there is the actress in actress_table """ # if not a in title.actresses_ap: # try: # actress = session.query(self._actress_cls).\ # join(self._ev_actress_cls).\ # filter(self._ev_actress_cls.name==a).\ # one() # except NoResultFound: # #ev_actress = session.query(self._ev_actress_cls).\ # # filter_by(name=a).\ # # one() # actress = self._actress_cls() # actress.ev_actress = ev_actress # for image in item['images']['face']: # if not image in actress.images_ap: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image).\ # one() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) # """ これは必要? """ # session.add(actress) # #session.commit() # title.actresses.append(actress) try: actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==a).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress """ Add images only when actress does not exist """ #if 'face' in item['images']: if item['images']['face']: for image in item['images']['face']: """ Doesn't need to be checked if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ try: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ filter_by(name=image['name']).\ one() except NoResultFound: ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ first() if not ev_actress_image.site_actress_image: """ When """ actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image else: actress_image = ev_actress_image.site_actress_image #session.add(actress_image) #session.commit() actress.images.append(actress_image) """ Not sure if the following "add" is needed """ session.add(actress) # for image in item['images']['face']: # if not image in actress.images_ap: # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image['name']).\ # one() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) #""" Not sure if the following "add" is needed """ #session.add(actress) if not a in title.actresses_ap: title.actresses.append(actress) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: if not image['name'] in title.images_ap: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(ev_title_id=ev_title.ev_title_id).\ filter_by(name=image['name']).\ filter_by(image_type=image_type).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) session.add(title) session.commit()
def _get_ev_title(self, item): ev_title = session.query(self._ev_title_cls).\ filter_by(original_id=item['original_id']).\ first() if not ev_title: ev_title = self._ev_title_cls(**(item.to_ev_title())) """ for categories """ if item['categories']: for category_name in item['categories']: """ Do nothing if ev_title already has the category """ #if not category_name in ev_title.categories_ap: if not category_name in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.HIRA2KATA) in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.KATA2HIRA) in ev_title.categories_ap: #if len(category_name) == 1: # ev_category = session.query(self._ev_category_cls).\ # filter_by(name=category_name).\ # one() #else: # ev_category = session.query(self._ev_category_cls).\ # filter(self._ev_category_cls.name.contains(category_name)).\ # first() try: ev_category = session.query(self._ev_category_cls).\ filter_by(name=category_name).\ one() except NoResultFound: if len(category_name) != 1: """ There might be same words which have different Kana (Hiragana and Katanaka) such as "生はめ" and "生ハメ" """ ev_category = session.query(self._ev_category_cls).\ filter((self._ev_actress_cls==cnvk.convert(category_name, cnvk.HIRA2KATA)) | \ (self._ev_actress_cls==cnvk.convert(category_name, cnvk.KATA2HIRA))).\ first() if not ev_category: ev_category = session.query(self._ev_category_cls).\ filter((self._ev_category_cls.name.contains(category_name)) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.HIRA2KATA))) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.KATA2HIRA)))).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None #if not ev_category: # raise NoResultFound if ev_category: ev_title.categories.append(ev_category) #ev_title.categories.append(ev_category) """ for actresses """ if item['actresses']: for actress_name in item['actresses']: """ Do nothing if ev_title already has the actress """ # if not actress_name in ev_title.actresses_ap: # try: # ev_actress = session.query(self._ev_actress_cls).\ # filter_by(name=actress_name).\ # one() # except NoResultFound: # ev_actress = self._ev_actress_cls(name=actress_name) # actress = self._actress_cls() # actress.ev_actress = ev_actress # for image in item['images']['face']: # if not image in ev_actress.images_ap: # ev_actress_image = self._ev_actress_image_cls(image) # ev_actress.images.append(ev_actress_image) # """ これは必要? """ # session.add(ev_actress) # #session.commit() # ev_title.actresses.append(ev_actress) try: ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=actress_name).\ one() except NoResultFound: ev_actress = self._ev_actress_cls(name=actress_name) #actress = self._actress_cls() #actress.ev_actress = ev_actress # """ Add images only when actress does not exist """ #if 'face' in item['images']: if item['images']['face']: for image in item['images']['face']: """ Don't need to check if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ ev_actress_image = self._ev_actress_image_cls(name=image['name'], original_url=image['original_url']) ev_actress.images.append(ev_actress_image) # """ これは必要? """ # session.add(ev_actress) # if item['images']['face']: # for image in item['images']['face']: # #if not image['name'] in ev_actress.images_ap: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # #ev_actress_image = self._ev_actress_image_cls(image) # ev_actress_image = self._ev_actress_image_cls(name=image['name'], # original_url=image['original_url']) # ev_actress.images.append(ev_actress_image) """ これは必要? """ session.add(ev_actress) if not actress_name in ev_title.actresses_ap: ev_title.actresses.append(ev_actress) # for image_type, names in item['images'].iteritems(): # for name in names: # """ Do nothing if ev_title already has the image """ # if not name in ev_title.images_ap: # ev_title_image = self._ev_title_image_cls(name=name, # image_type=image_type) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: """ Do nothing if ev_title already has the image """ if not image['name'] in ev_title.images_ap: ev_title_image = self._ev_title_image_cls(name=image['name'], original_url=image['original_url'], image_type=image_type) ev_title.images.append(ev_title_image) session.add(ev_title) session.commit() return ev_title
def _to_sqlalchemy(self, item): #ev_title = self._get_ev_title(item) #from erovideo.models.x1x.x1x_title import X1XJPTitle #title = session.query(self._title_cls).\ # join(self._ev_title_cls).\ # filter(self._ev_title_cls.original_id==item['original_id']).\ # first() #if not title: #title = self._title_cls(**self._to_insert(item)) # title = self._title_cls(**item.to_title()) #title = self._set_relationships(item, title) try: title = session.query(self._title_cls).\ join(self._ev_title_cls).\ filter(self._ev_title_cls.original_id==item['original_id']).\ one() except NoResultFound: title = self._title_cls(**item.to_title()) title = self._set_relationships(item, title) ev_title = self._get_ev_title(item) try: title.ev_title = ev_title except Exception as e: print '### debug ###' print title.site_id print item['original_id'] print self._title_cls print '### debug ###' raise e """ for categories """ if item['categories']: for c in item['categories']: """ Do nothing if title already has the category """ if not c in title.categories_ap: #if not c in title.categories_ap or not cnvk.convert(c, cnvk.HIRA2KATA) in title.categories_ap or not cnvk.convert(c, cnvk.KATA2HIRA): category = session.query(self._category_cls).\ filter_by(name=c).\ first() if not category: category = session.query(self._category_cls).\ filter((self._category_cls.name==cnvk.convert(c, cnvk.HIRA2KATA)) | \ (self._category_cls.name==cnvk.convert(c, cnvk.KATA2HIRA))).\ first() #if not category: # raise NoResultFound if category: title.categories.append(category) #title.categories.append(category) """ for actresses """ if item['actresses']: for a in item['actresses']: """ ev_actress should be in the ev_actress table because it was already inserted in _get_ev_title if it's not existed (ev_actress_image too) """ ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=a).\ one() #""" Do nothing if there is the actress in actress_table """ """ Need to check actress and actress_image before checking if the title has them because actress might not have the images. actressがface画像を持っていない場合の事を考えて、 初めにactressとactress_imageをチェックしてから titleオブジェクトがactressを持っているか否を確認する。 """ try: actress = session.query(self._actress_cls).\ join(self._ev_actress_cls).\ filter(self._ev_actress_cls.name==a).\ one() except NoResultFound: actress = self._actress_cls() actress.ev_actress = ev_actress #""" Add images only when actress does not exist """ #if 'face' in item['images']: # if item['images']['face']: # for image in item['images']['face']: # if not image['name'] in actress.images_ap: # """ Doesn't need to be checked if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # try: # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # filter_by(name=image['name']).\ # one() # except NoResultFound: # ev_actress_image = session.query(self._ev_actress_image_cls).\ # filter_by(ev_actress_id=ev_actress.ev_actress_id).\ # first() # actress_image = self._actress_image_cls() # actress_image.ev_actress_image = ev_actress_image # actress.images.append(actress_image) # """ Not sure if the following "add" is needed # => Needed to commit actress object # """ # session.add(actress) if item['images']['face']: for image in item['images']['face']: if not image['name'] in actress.images_ap: """ ev_actress_image should be one because it's created in _get_ev_title() """ ev_actress_image = session.query(self._ev_actress_image_cls).\ filter_by(ev_actress_id=ev_actress.ev_actress_id).\ filter_by(name=image['name']).\ one() actress_image = self._actress_image_cls() actress_image.ev_actress_image = ev_actress_image actress.images.append(actress_image) """ Not sure if the following "add" is needed => Needed to commit actress object """ session.add(actress) if not a in title.actresses_ap: title.actresses.append(actress) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: if not image['name'] in title.images_ap: ev_title_image = session.query(self._ev_title_image_cls).\ filter_by(ev_title_id=ev_title.ev_title_id).\ filter_by(name=image['name']).\ filter_by(image_type=image_type).\ one() title_image = self._title_image_cls() title_image.ev_title_image = ev_title_image title.images.append(title_image) session.add(title) session.commit()
def _get_ev_title(self, item): #ev_title = session.query(self._ev_title_cls).\ # filter_by(original_id=item['original_id']).\ # first() #if not ev_title: # ev_title = self._ev_title_cls(**(item.to_ev_title())) try: """ original_id might be the same at the other site so need one more filter which is original_url """ ev_title = session.query(self._ev_title_cls).\ filter_by(original_id=item['original_id']).\ filter_by(original_url=item['original_url']).\ one() except NoResultFound: ev_title = self._ev_title_cls(**(item.to_ev_title())) """ for categories """ if item['categories']: for category_name in item['categories']: """ Do nothing if ev_title already has the category """ #if not category_name in ev_title.categories_ap: if not category_name in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.HIRA2KATA) in ev_title.categories_ap \ or not cnvk.convert(category_name, cnvk.KATA2HIRA) in ev_title.categories_ap: #if len(category_name) == 1: # ev_category = session.query(self._ev_category_cls).\ # filter_by(name=category_name).\ # one() #else: # ev_category = session.query(self._ev_category_cls).\ # filter(self._ev_category_cls.name.contains(category_name)).\ # first() try: ev_category = session.query(self._ev_category_cls).\ filter_by(name=category_name).\ one() except NoResultFound: if len(category_name) != 1: """ There might be same words which have different Kana (Hiragana and Katanaka) such as "生はめ" and "生ハメ" """ ev_category = session.query(self._ev_category_cls).\ filter((self._ev_actress_cls==cnvk.convert(category_name, cnvk.HIRA2KATA)) | \ (self._ev_actress_cls==cnvk.convert(category_name, cnvk.KATA2HIRA))).\ first() if not ev_category: ev_category = session.query(self._ev_category_cls).\ filter((self._ev_category_cls.name.contains(category_name)) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.HIRA2KATA))) | \ (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.KATA2HIRA)))).\ order_by(self._ev_category_cls.ev_category_id).\ first() else: """ If category_name which is only one character is here, it means there is no category_name in the database """ ev_category = None #if not ev_category: # raise NoResultFound if ev_category: ev_title.categories.append(ev_category) #ev_title.categories.append(ev_category) """ for actresses """ if item['actresses']: for actress_name in item['actresses']: #""" Do nothing if ev_title already has the actress """ """ Need to check ev_actress and ev_actress_image before checking if the title has them because ev_actress might not have the images. ev_actressがface画像を持っていない場合の事を考えて、 初めにev_actressとev_actress_imageをチェックしてから ev_titleオブジェクトがev_actressを持っているか否を確認する。 """ try: ev_actress = session.query(self._ev_actress_cls).\ filter_by(name=actress_name).\ one() except NoResultFound: ev_actress = self._ev_actress_cls(name=actress_name) #actress = self._actress_cls() #actress.ev_actress = ev_actress #""" Add images only when actress does not exist """ #if item['images']['face']: # for image in item['images']['face']: # """ Don't need to check if ev_actress_image exists # if image is not in actress.images_ap # because relationship for actress and actress_image is one to many """ # ev_actress_image = self._ev_actress_image_cls(name=image['name'], # original_url=image['original_url']) # ev_actress.images.append(ev_actress_image) #""" これは必要? """ #session.add(ev_actress) """ Add actress face images each time when a new title released. ev_actress.images is sorted by descent created time. 新しいタイトルが追加されるごとに、face画像がある場合にはactressに追加する。 ev_actress.imagesは作成時間が新しい物順に表示される。 """ if item['images']['face']: for image in item['images']['face']: """ Don't need to check if ev_actress_image exists if image is not in actress.images_ap because relationship for actress and actress_image is one to many """ if not image['name'] in ev_actress.images_ap: ev_actress_image = self._ev_actress_image_cls(name=image['name'], original_url=image['original_url']) ev_actress.images.append(ev_actress_image) """ これは必要? => 必要。addすることにより、仮に下のif文でTrueにならなくても 次のcommit()でinsert又はupdateされる。 """ session.add(ev_actress) if not actress_name in ev_title.actresses_ap: ev_title.actresses.append(ev_actress) # for image_type, names in item['images'].iteritems(): # for name in names: # """ Do nothing if ev_title already has the image """ # if not name in ev_title.images_ap: # ev_title_image = self._ev_title_image_cls(name=name, # image_type=image_type) """ for title_images """ for image_type, images in item['images'].iteritems(): if images: for image in images: """ Do nothing if ev_title already has the image """ if not image['name'] in ev_title.images_ap: ev_title_image = self._ev_title_image_cls(name=image['name'], original_url=image['original_url'], image_type=image_type) ev_title.images.append(ev_title_image) session.add(ev_title) session.commit() return ev_title