Ejemplo n.º 1
0
 def __getitem__(self, key):
     if key == 'name':
         str_list = self._hxs.select(self._xpath_config[key]).extract()
         return [s.strip() for s in str_list]
     elif key == 'name_index_hiragana' or key == 'name_index_katakana':
         str_list = self._hxs.select(self._xpath_config['name_index']).extract()
         if str_list:
             #name_index = self._hxs.select(self._xpath_config['name_index']).extract()[0][1]
             name_index = str_list[0][1]
             if key == 'name_index_hiragana':
                 #return [i for i in itertools.repeat(cnvk.convert(name_index.encode('utf-8'), cnvk.KATA2HIRA), len(self['name']))]
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(self['name']))]
             elif key == 'name_index_katakana':
                 #return [i for i in itertools.repeat(cnvk.convert(name_index.encode('utf-8'), cnvk.HIRA2KATA), len(self['name']))]
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(self['name']))]
             else:
                 raise Exception
         else:
             """ Emply list is returned if kana=other """
             return [i for i in itertools.repeat(None, len(self['name']))]
     elif key == 'image':
         image_urls = self._hxs.select(self._xpath_config[key]).extract()
         #return ['%s.jpg' % hashlib.sha1(image_url).hexdigest() for image_url in image_urls]
         #return [(get_image_name(image_url), image_url) for image_url in image_urls]
         return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls]
     else:
         raise KeyError(key)
 def __getitem__(self, key):
     if key == 'name':
         str_list = self._hxs.select(self._xpath_config[key]).extract()
         return [s.strip() for s in str_list]
     elif key == 'name_index_hiragana' or key == 'name_index_katakana':
         str_list = self._hxs.select(self._xpath_config['name_index']).re(ur'「.」')
         if str_list:
             name_index = str_list[0].strip(u'「').strip(u'」').strip()
             if key == 'name_index_hiragana':
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA),
                                                     len(self._hxs.select(self._xpath_config['name']).extract()))]
             elif key == 'name_index_katakana':
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA),
                                                     len(self._hxs.select(self._xpath_config['name']).extract()))]
             else:
                 raise KeyError(key)
         else:
             raise ValueError(str_list)
     elif key == 'image':
         #image_urls = self._hxs.select(self._xpath_config[key]).extract()
         image_urls = ['http://my.tokyo-hot.com%s' % image_url
                       for image_url in self._hxs.select(self._xpath_config[key]).extract()]
         #return [{'name': get_image_name(image_url),
         #         'original_url': 'http://my.tokyo-hot.com%s' % image_url} for image_url in image_urls]
         return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls]
     else:
         raise KeyError(key)
Ejemplo n.º 3
0
 def __getitem__(self, key):
     if key == 'name':
         #str_list = self._hxs.select(self._xpath_config[key]).extract()
         #return [s.strip() for s in str_list]
         name_list = []
         name_index_hiragana_list = []
         name_index_katakana_list = []
         for i in range(len(self._hxs.select(self._xpath_config['name_index']))):
         #for xpath in self._xpath_config[key]:
             str_list = self._hxs.select(self._xpath_config[key][i]).extract()
             name_index = self._hxs.select(self._xpath_config['name_index']).extract()[i]
             try:
                 name_index_hiragana_list += [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(str_list))]
                 name_index_katakana_list += [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(str_list))]
             except TypeError:
                 raise TypeError(type(name_index_hiragana_list))
             name_list += [s.strip() for s in str_list]
         self['name_index_hiragana'] = name_index_hiragana_list
         self['name_index_katakana'] = name_index_katakana_list
         return name_list
     elif key == 'name_index_hiragana' or key == 'name_index_katakana':
         #return self[key]
         pass
         """
         str_list = self._hxs.select(self._xpath_config['name_index']).extract()
         if str_list:
             name_index = str_list[0][1]
             #name_index = str_list
             if key == 'name_index_hiragana':
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(self['name']))]
             elif key == 'name_index_katakana':
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(self['name']))]
             else:
                 #raise Exception
                 raise KeyError(key)
         else:
             return [i for i in itertools.repeat(None, len(self['name']))]
         """
     elif key == 'image':
         #image_urls = self._hxs.select(self._xpath_config[key]).extract()
         image_urls = ['http://www.caribbeancom.com%s' % image_url
                       for image_url in self._hxs.select(self._xpath_config[key]).extract()]
         #image_urls = self['image_url']
         #return ['%s.jpg' % hashlib.sha1(image_url).hexdigest() for image_url in image_urls]
         #return [{'name': get_image_name(image_url), 'original_url': 'http://www.caribbeancom.com%s' % image_url}
         #        for image_url in image_urls]
         return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls]
     else:
         raise KeyError(key)
Ejemplo n.º 4
0
    def _merge_items(self):
        new_item = {}
        for key, value in self._item.iteritems():
            if key == 'actress_names':
                #new_item['actress_name'] = value
                new_item['actress_name'] = [v.strip() for v in value]
            elif key == 'actress_name_kana':
                new_item['actress_name_hiragana'] = [cnvk.convert(i, cnvk.KATA2HIRA) for i in value]
                new_item['actress_name_katakana'] = [cnvk.convert(i, cnvk.HIRA2KATA) for i in value]
            elif key == 'actress_name_index':
                if value:
                    new_item['actress_name_index_hiragana'] = [i for i in itertools.repeat(cnvk.convert(value.decode('utf-8'), cnvk.KATA2HIRA), len(self._item['actress_names']))]
                    new_item['actress_name_index_katakana'] = [i for i in itertools.repeat(cnvk.convert(value.decode('utf-8'), cnvk.HIRA2KATA), len(self._item['actress_names']))]
                else:
                    new_item['actress_name_index_hiragana'] = new_item['actress_name_index_katakana'] = [i for i in itertools.repeat(value, len(self._item['actress_names']))]
            elif key == 'image_urls':
                new_item['image_name'] = ['%s.jpg' % hashlib.sha1(i).hexdigest() for i in value]
            else:
                new_item[key] = value

        return merge_items(new_item)        
Ejemplo n.º 5
0
 def __getitem__(self, key):
     if key == 'name':
         name_list = []
         for i in range(len(self._xpath_config[key])):
             for name in self._hxs.select(self._xpath_config[key][i]).extract():
                 name_list.append(name.split()[0].strip())
             #name_list += name_list
         return name_list
     elif key == 'name_hiragana' or key == 'name_katakana':
         name_kana_list = []
         for i in range(len(self._xpath_config['name'])):
             for name in self._hxs.select(self._xpath_config['name'][i]).extract():
                 if len(name.split()) == 2:
                     if key == 'name_hiragana':
                         name_kana_list.append(cnvk.convert(name.split()[1].strip(), cnvk.KATA2HIRA))
                     elif key == 'name_katakana':
                         name_kana_list.append(cnvk.convert(name.split()[1].strip(), cnvk.HIRA2KATA))
                 else:
                     name_kana_list.append(None)
         return name_kana_list
     elif key == 'name_index_hiragana' or key == 'name_index_katakana':
         name_index_kana_list = []
         for i in range(len(self._hxs.select(self._xpath_config['name_index']))):
             name_temp_list = self._hxs.select(self._xpath_config['name'][i]).extract()
             name_index = self._hxs.select(self._xpath_config['name_index']).extract()[i]
             try:
                 if key == 'name_index_hiragana':
                     name_index_kana_list += list(itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA), len(name_temp_list)))
                 elif key == 'name_index_katakana':
                     name_index_kana_list += list(itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA), len(name_temp_list)))
                 else:
                     raise TypeError
             except TypeError:
                 raise TypeError(type(name_index_kana_list))
         return name_index_kana_list
     elif key == 'image':
         image_urls = self._hxs.select(self._xpath_config[key]).extract()
         return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls]
     else:
         raise KeyError(key)
Ejemplo n.º 6
0
 def __getitem__(self, key):
     if key == 'name':
         str_list = self._hxs.select(self._xpath_config[key]).extract()
         return [s.strip() for s in str_list]
     elif key == 'name_index_hiragana' or key == 'name_index_katakana':
         str_list = self._hxs.select(self._xpath_config['name_index']).extract()
         if str_list:
             name_index = str_list[0].strip()
             if key == 'name_index_hiragana':
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.KATA2HIRA),
                                                     len(self._hxs.select(self._xpath_config['name']).extract()))]
             elif key == 'name_index_katakana':
                 return [i for i in itertools.repeat(cnvk.convert(name_index, cnvk.HIRA2KATA),
                                                     len(self._hxs.select(self._xpath_config['name']).extract()))]
             else:
                 raise Exception
         else:
             """ Emply list is returned if kana=other """
             return [i for i in itertools.repeat(None, len(self._hxs.select(self._xpath_config['name']).extract()))]
     elif key == 'image':
         image_urls = self._hxs.select(self._xpath_config[key]).extract()
         return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls]
     else:
         raise KeyError(key)
Ejemplo n.º 7
0
 def __getitem__(self, key):
     if key == 'name':
         return [name.strip() for name in self._hxs.select(self._xpath_config[key]).extract()]
     elif key == 'name_index_hiragana' or key == 'name_index_katakana':
         name_index_kana_list = []
         indices = self._hxs.select(self._xpath_config['name_index']).extract()
         for i in range(len(indices)):
             if i + 1 == len(indices):
                 pattern = re.compile(r'<dt>%s</dt>[\S\s]*</dl>' % indices[i].encode('utf-8'))
             else:
                 pattern = re.compile(r'<dt>%s</dt>[\S\s]*<dt>%s</dt>' % (indices[i].encode('utf-8'),
                                                                          indices[i + 1].encode('utf-8')))
             actresses = [re.sub('alt="|" />', '', name) for name in re.findall(r'alt=".* />', re.findall(pattern, self._response._body)[0])]
             try:
                 if key == 'name_index_hiragana':
                     name_index_kana_list += list(itertools.repeat(cnvk.convert(indices[i], cnvk.KATA2HIRA),
                                                                   len(actresses)))
                 elif key == 'name_index_katakana':
                     name_index_kana_list += list(itertools.repeat(cnvk.convert(indices[i], cnvk.HIRA2KATA),
                                                                   len(actresses)))
                 else:
                     raise TypeError
             except TypeError:
                 raise TypeError(type(name_index_kana_list))
             else:
                 i += 1
         return name_index_kana_list
     elif key == 'image':
         #image_urls = self._hxs.select(self._xpath_config[key]).extract()
         image_urls = ['http://www.heyzo.com%s' % image_url
                       for image_url in self._hxs.select(self._xpath_config[key]).extract()]
         #return [{'name': get_image_name('http://www.heyzo.com/%s' % image_url),
         #         'original_url': 'http://www.heyzo.com/%s' % image_url} for image_url in image_urls]
         return [{'name': get_image_name(image_url), 'original_url': image_url} for image_url in image_urls]
     else:
         raise KeyError(key)
    def _to_sqlalchemy(self, item):
        #ev_title = self._get_ev_title(item)
        #from erovideo.models.x1x.x1x_title import X1XJPTitle
        title = session.query(self._title_cls).\
                    join(self._ev_title_cls).\
                    filter(self._ev_title_cls.original_id==item['original_id']).\
                    first()

        if not title:
            #title = self._title_cls(**self._to_insert(item))
            title = self._title_cls(**item.to_title())
            #title = self._set_relationships(item, title)

        title = self._set_relationships(item, title)

        ev_title = self._get_ev_title(item)
        title.ev_title = ev_title
        
        """ for categories """
        if item['categories']:
            for c in item['categories']:
                """ Do nothing if title already has the category """
                if not c in title.categories_ap:
                #if not c in title.categories_ap or not cnvk.convert(c, cnvk.HIRA2KATA) in title.categories_ap or not cnvk.convert(c, cnvk.KATA2HIRA):
                    category = session.query(self._category_cls).\
                                    filter_by(name=c).\
                                    first()
                    
                    if not category:
                        category = session.query(self._category_cls).\
                                         filter((self._category_cls.name==cnvk.convert(c, cnvk.HIRA2KATA)) | \
                                                (self._category_cls.name==cnvk.convert(c, cnvk.KATA2HIRA))).\
                                         first()
                        
                        #if not category:
                        #    raise NoResultFound
                
                    if category:
                        title.categories.append(category)
                    #title.categories.append(category)
        
        """ for actresses """
        if item['actresses']:
            for a in item['actresses']:
                """ ev_actress should be in the ev_actress table 
                    because it was already inserted in _get_ev_title 
                    if it's not existed (ev_actress_image too) """
                ev_actress = session.query(self._ev_actress_cls).\
                    filter_by(name=a).\
                    one()
                                        
                """ Do nothing if there is the actress in actress_table """
#                if not a in title.actresses_ap:
#                    try:
#                        actress = session.query(self._actress_cls).\
#                                        join(self._ev_actress_cls).\
#                                        filter(self._ev_actress_cls.name==a).\
#                                        one()
#                    except NoResultFound:
#                        #ev_actress = session.query(self._ev_actress_cls).\
#                        #                filter_by(name=a).\
#                        #                one()
#                        actress = self._actress_cls()
#                        actress.ev_actress = ev_actress
                        
#                    for image in item['images']['face']:
#                        if not image in actress.images_ap:
#                            """ Don't need to check if ev_actress_image exists
#                                if image is not in actress.images_ap 
#                                because relationship for actress and actress_image is one to many """
#                            ev_actress_image = session.query(self._ev_actress_image_cls).\
#                                                    filter_by(ev_actress_id=ev_actress.ev_actress_id).\
#                                                    filter_by(name=image).\
#                                                    one()
                                            
#                            actress_image = self._actress_image_cls()
#                            actress_image.ev_actress_image = ev_actress_image
#                            actress.images.append(actress_image)
                
#                    """ これは必要? """
#                    session.add(actress)
#                    #session.commit()
                       
#                    title.actresses.append(actress)
                    
                try:
                    actress = session.query(self._actress_cls).\
                        join(self._ev_actress_cls).\
                        filter(self._ev_actress_cls.name==a).\
                        one()
                except NoResultFound:
                    actress = self._actress_cls()
                    actress.ev_actress = ev_actress
                    
                    """ Add images only when actress does not exist """
                    #if 'face' in item['images']:
                    if item['images']['face']:
                        for image in item['images']['face']:
                            """ Doesn't need to be checked if ev_actress_image exists
                                if image is not in actress.images_ap 
                                because relationship for actress and actress_image is one to many """
                            try:
                                ev_actress_image = session.query(self._ev_actress_image_cls).\
                                    filter_by(ev_actress_id=ev_actress.ev_actress_id).\
                                    filter_by(name=image['name']).\
                                    one()
                            except NoResultFound:
                                ev_actress_image = session.query(self._ev_actress_image_cls).\
                                    filter_by(ev_actress_id=ev_actress.ev_actress_id).\
                                    first()

                            if not ev_actress_image.site_actress_image:
                                """ When
                                """
                                actress_image = self._actress_image_cls()
                                actress_image.ev_actress_image = ev_actress_image
                            else:
                                actress_image = ev_actress_image.site_actress_image
                            #session.add(actress_image)
                            #session.commit()

                            actress.images.append(actress_image)
                        
                    """ Not sure if the following "add" is needed """
                    session.add(actress)
#                for image in item['images']['face']:
#                    if not image in actress.images_ap:
#                        ev_actress_image = session.query(self._ev_actress_image_cls).\
#                                                filter_by(ev_actress_id=ev_actress.ev_actress_id).\
#                                                filter_by(name=image['name']).\
#                                                one()
                                            
#                        actress_image = self._actress_image_cls()
#                        actress_image.ev_actress_image = ev_actress_image
#                        actress.images.append(actress_image)
                        
                #""" Not sure if the following "add" is needed """
                #session.add(actress)
                
                if not a in title.actresses_ap:
                    title.actresses.append(actress)
        
        """ for title_images """    
        for image_type, images in item['images'].iteritems():
            if images:
                for image in images:
                    if not image['name'] in title.images_ap:
                        ev_title_image = session.query(self._ev_title_image_cls).\
                                            filter_by(ev_title_id=ev_title.ev_title_id).\
                                            filter_by(name=image['name']).\
                                            filter_by(image_type=image_type).\
                                            one()
                
                        title_image = self._title_image_cls()
                        title_image.ev_title_image = ev_title_image
                        title.images.append(title_image)
                    
        session.add(title)
        session.commit()
    def _get_ev_title(self, item):
        ev_title = session.query(self._ev_title_cls).\
                        filter_by(original_id=item['original_id']).\
                        first()
        if not ev_title:
            ev_title = self._ev_title_cls(**(item.to_ev_title()))
            
        """ for categories """
        if item['categories']:
            for category_name in item['categories']:
                """ Do nothing if ev_title already has the category """
                #if not category_name in ev_title.categories_ap:
                if not category_name in ev_title.categories_ap \
                    or not cnvk.convert(category_name, cnvk.HIRA2KATA) in ev_title.categories_ap \
                    or not cnvk.convert(category_name, cnvk.KATA2HIRA) in ev_title.categories_ap:
                    #if len(category_name) == 1:
                    #    ev_category = session.query(self._ev_category_cls).\
                    #                        filter_by(name=category_name).\
                    #                        one()
                    #else:
                    #    ev_category = session.query(self._ev_category_cls).\
                    #                        filter(self._ev_category_cls.name.contains(category_name)).\
                    #                        first()
                    
                    try:
                        ev_category = session.query(self._ev_category_cls).\
                                            filter_by(name=category_name).\
                                            one()
                    except NoResultFound:
                        if len(category_name) != 1:
                            """ There might be same words which have different Kana (Hiragana and Katanaka) 
                                such as "生はめ" and "生ハメ" """
                            ev_category = session.query(self._ev_category_cls).\
                                filter((self._ev_actress_cls==cnvk.convert(category_name, cnvk.HIRA2KATA)) | \
                                       (self._ev_actress_cls==cnvk.convert(category_name, cnvk.KATA2HIRA))).\
                                first()
                                                
                            if not ev_category:
                                ev_category = session.query(self._ev_category_cls).\
                                    filter((self._ev_category_cls.name.contains(category_name)) | \
                                           (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.HIRA2KATA))) | \
                                           (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.KATA2HIRA)))).\
                                    order_by(self._ev_category_cls.ev_category_id).\
                                    first()
                        else:
                            """ If category_name which is only one character is here,
                                it means there is no category_name in the database """
                            ev_category = None
                    
                    #if not ev_category:
                    #    raise NoResultFound
                    if ev_category:
                        ev_title.categories.append(ev_category)
                    #ev_title.categories.append(ev_category)
        
        """ for actresses """
        if item['actresses']:
            for actress_name in item['actresses']:
                """ Do nothing if ev_title already has the actress """
#                if not actress_name in ev_title.actresses_ap:
#                    try:
#                        ev_actress = session.query(self._ev_actress_cls).\
#                                        filter_by(name=actress_name).\
#                                        one()
#                    except NoResultFound:
#                        ev_actress = self._ev_actress_cls(name=actress_name)
#                        actress = self._actress_cls()
#                        actress.ev_actress = ev_actress
                    
#                    for image in item['images']['face']:
#                        if not image in ev_actress.images_ap:
#                            ev_actress_image = self._ev_actress_image_cls(image)
#                           ev_actress.images.append(ev_actress_image)
                    
#                    """ これは必要? """
#                    session.add(ev_actress)
#                    #session.commit()

#                    ev_title.actresses.append(ev_actress)
                    
                try:
                    ev_actress = session.query(self._ev_actress_cls).\
                        filter_by(name=actress_name).\
                        one()
                except NoResultFound:
                    ev_actress = self._ev_actress_cls(name=actress_name)
                    #actress = self._actress_cls()
                    #actress.ev_actress = ev_actress
                    
#                    """ Add images only when actress does not exist """
                    #if 'face' in item['images']:
                    if item['images']['face']:
                        for image in item['images']['face']:
                            """ Don't need to check if ev_actress_image exists
                                if image is not in actress.images_ap
                                because relationship for actress and actress_image is one to many """
                            ev_actress_image = self._ev_actress_image_cls(name=image['name'],
                                                                          original_url=image['original_url'])
                            ev_actress.images.append(ev_actress_image)
                
#                    """ これは必要? """
#                    session.add(ev_actress)

#                if item['images']['face']:
#                    for image in item['images']['face']:
#                        #if not image['name'] in ev_actress.images_ap:
#                        """ Don't need to check if ev_actress_image exists
#                            if image is not in actress.images_ap
#                            because relationship for actress and actress_image is one to many """
#                        #ev_actress_image = self._ev_actress_image_cls(image)
#                        ev_actress_image = self._ev_actress_image_cls(name=image['name'],
#                                                                      original_url=image['original_url'])
#                        ev_actress.images.append(ev_actress_image)
                
                """ これは必要? """
                session.add(ev_actress)
                        
                if not actress_name in ev_title.actresses_ap:
                    ev_title.actresses.append(ev_actress)
                    
#        for image_type, names in item['images'].iteritems():
#            for name in names:
#                """ Do nothing if ev_title already has the image """
#                if not name in ev_title.images_ap:
#                    ev_title_image = self._ev_title_image_cls(name=name,
#                                                              image_type=image_type)
        """ for title_images """
        for image_type, images in item['images'].iteritems():
            if images:
                for image in images:
                    """ Do nothing if ev_title already has the image """
                    if not image['name'] in ev_title.images_ap:
                        ev_title_image = self._ev_title_image_cls(name=image['name'],
                                                                  original_url=image['original_url'],
                                                                  image_type=image_type)
                        ev_title.images.append(ev_title_image)
                                                
        session.add(ev_title)
        session.commit()
        
        return ev_title
Ejemplo n.º 10
0
    def _to_sqlalchemy(self, item):
        #ev_title = self._get_ev_title(item)
        #from erovideo.models.x1x.x1x_title import X1XJPTitle
        #title = session.query(self._title_cls).\
        #    join(self._ev_title_cls).\
        #    filter(self._ev_title_cls.original_id==item['original_id']).\
        #    first()

        #if not title:
            #title = self._title_cls(**self._to_insert(item))
        #    title = self._title_cls(**item.to_title())
            #title = self._set_relationships(item, title)

        try:
            title = session.query(self._title_cls).\
                join(self._ev_title_cls).\
                filter(self._ev_title_cls.original_id==item['original_id']).\
                one()
        except NoResultFound:
            title = self._title_cls(**item.to_title())

        title = self._set_relationships(item, title)

        ev_title = self._get_ev_title(item)
        try:
            title.ev_title = ev_title
        except Exception as e:
            print '### debug ###'
            print title.site_id
            print item['original_id']
            print self._title_cls
            print '### debug ###'
            raise e

        
        """ for categories """
        if item['categories']:
            for c in item['categories']:
                """ Do nothing if title already has the category """
                if not c in title.categories_ap:
                #if not c in title.categories_ap or not cnvk.convert(c, cnvk.HIRA2KATA) in title.categories_ap or not cnvk.convert(c, cnvk.KATA2HIRA):
                    category = session.query(self._category_cls).\
                                    filter_by(name=c).\
                                    first()
                    
                    if not category:
                        category = session.query(self._category_cls).\
                                         filter((self._category_cls.name==cnvk.convert(c, cnvk.HIRA2KATA)) | \
                                                (self._category_cls.name==cnvk.convert(c, cnvk.KATA2HIRA))).\
                                         first()
                        
                        #if not category:
                        #    raise NoResultFound
                
                    if category:
                        title.categories.append(category)
                    #title.categories.append(category)
        
        """ for actresses """
        if item['actresses']:
            for a in item['actresses']:
                """ ev_actress should be in the ev_actress table 
                    because it was already inserted in _get_ev_title 
                    if it's not existed (ev_actress_image too)
                """
                ev_actress = session.query(self._ev_actress_cls).\
                    filter_by(name=a).\
                    one()
                                        
                #""" Do nothing if there is the actress in actress_table """
                """ Need to check actress and actress_image before checking if the title has them
                because actress might not have the images.
                actressがface画像を持っていない場合の事を考えて、
                初めにactressとactress_imageをチェックしてから
                titleオブジェクトがactressを持っているか否を確認する。
                """
                try:
                    actress = session.query(self._actress_cls).\
                        join(self._ev_actress_cls).\
                        filter(self._ev_actress_cls.name==a).\
                        one()
                except NoResultFound:
                    actress = self._actress_cls()
                    actress.ev_actress = ev_actress
                    
                    #""" Add images only when actress does not exist """
                    #if 'face' in item['images']:
#                if item['images']['face']:
#                    for image in item['images']['face']:
#                        if not image['name'] in actress.images_ap:
#                            """ Doesn't need to be checked if ev_actress_image exists
#                            if image is not in actress.images_ap
#                            because relationship for actress and actress_image is one to many """
#                            try:
#                                ev_actress_image = session.query(self._ev_actress_image_cls).\
#                                    filter_by(ev_actress_id=ev_actress.ev_actress_id).\
#                                    filter_by(name=image['name']).\
#                                    one()
#                            except NoResultFound:
#                                ev_actress_image = session.query(self._ev_actress_image_cls).\
#                                    filter_by(ev_actress_id=ev_actress.ev_actress_id).\
#                                    first()

#                            actress_image = self._actress_image_cls()
#                            actress_image.ev_actress_image = ev_actress_image

#                            actress.images.append(actress_image)
                        
#                    """ Not sure if the following "add" is needed
#                    => Needed to commit actress object
#                    """
#                    session.add(actress)
                if item['images']['face']:
                    for image in item['images']['face']:
                        if not image['name'] in actress.images_ap:
                            """ ev_actress_image should be one
                            because it's created in _get_ev_title()
                            """
                            ev_actress_image = session.query(self._ev_actress_image_cls).\
                                filter_by(ev_actress_id=ev_actress.ev_actress_id).\
                                filter_by(name=image['name']).\
                                one()

                            actress_image = self._actress_image_cls()
                            actress_image.ev_actress_image = ev_actress_image
                            actress.images.append(actress_image)
                        
                            """ Not sure if the following "add" is needed
                            => Needed to commit actress object
                            """
                            session.add(actress)
                
                if not a in title.actresses_ap:
                    title.actresses.append(actress)
        
        """ for title_images """    
        for image_type, images in item['images'].iteritems():
            if images:
                for image in images:
                    if not image['name'] in title.images_ap:
                        ev_title_image = session.query(self._ev_title_image_cls).\
                                            filter_by(ev_title_id=ev_title.ev_title_id).\
                                            filter_by(name=image['name']).\
                                            filter_by(image_type=image_type).\
                                            one()
                
                        title_image = self._title_image_cls()
                        title_image.ev_title_image = ev_title_image
                        title.images.append(title_image)
                    
        session.add(title)
        session.commit()
Ejemplo n.º 11
0
    def _get_ev_title(self, item):
        #ev_title = session.query(self._ev_title_cls).\
        #    filter_by(original_id=item['original_id']).\
        #    first()

        #if not ev_title:
        #    ev_title = self._ev_title_cls(**(item.to_ev_title()))

        try:
            """ original_id might be the same at the other site so need one more filter which is original_url """
            ev_title = session.query(self._ev_title_cls).\
                filter_by(original_id=item['original_id']).\
                filter_by(original_url=item['original_url']).\
                one()
        except NoResultFound:
            ev_title = self._ev_title_cls(**(item.to_ev_title()))
            
        """ for categories """
        if item['categories']:
            for category_name in item['categories']:
                """ Do nothing if ev_title already has the category """
                #if not category_name in ev_title.categories_ap:
                if not category_name in ev_title.categories_ap \
                    or not cnvk.convert(category_name, cnvk.HIRA2KATA) in ev_title.categories_ap \
                    or not cnvk.convert(category_name, cnvk.KATA2HIRA) in ev_title.categories_ap:
                    #if len(category_name) == 1:
                    #    ev_category = session.query(self._ev_category_cls).\
                    #                        filter_by(name=category_name).\
                    #                        one()
                    #else:
                    #    ev_category = session.query(self._ev_category_cls).\
                    #                        filter(self._ev_category_cls.name.contains(category_name)).\
                    #                        first()
                    
                    try:
                        ev_category = session.query(self._ev_category_cls).\
                                            filter_by(name=category_name).\
                                            one()
                    except NoResultFound:
                        if len(category_name) != 1:
                            """ There might be same words which have different Kana (Hiragana and Katanaka) 
                                such as "生はめ" and "生ハメ" """
                            ev_category = session.query(self._ev_category_cls).\
                                filter((self._ev_actress_cls==cnvk.convert(category_name, cnvk.HIRA2KATA)) | \
                                       (self._ev_actress_cls==cnvk.convert(category_name, cnvk.KATA2HIRA))).\
                                first()
                                                
                            if not ev_category:
                                ev_category = session.query(self._ev_category_cls).\
                                    filter((self._ev_category_cls.name.contains(category_name)) | \
                                           (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.HIRA2KATA))) | \
                                           (self._ev_category_cls.name.contains(cnvk.convert(category_name, cnvk.KATA2HIRA)))).\
                                    order_by(self._ev_category_cls.ev_category_id).\
                                    first()
                        else:
                            """ If category_name which is only one character is here,
                                it means there is no category_name in the database """
                            ev_category = None
                    
                    #if not ev_category:
                    #    raise NoResultFound
                    if ev_category:
                        ev_title.categories.append(ev_category)
                    #ev_title.categories.append(ev_category)
        
        """ for actresses """
        if item['actresses']:
            for actress_name in item['actresses']:
                #""" Do nothing if ev_title already has the actress """
                """ Need to check ev_actress and ev_actress_image
                before checking if the title has them
                because ev_actress might not have the images.
                ev_actressがface画像を持っていない場合の事を考えて、
                初めにev_actressとev_actress_imageをチェックしてから
                ev_titleオブジェクトがev_actressを持っているか否を確認する。
                """

                try:
                    ev_actress = session.query(self._ev_actress_cls).\
                        filter_by(name=actress_name).\
                        one()
                except NoResultFound:
                    ev_actress = self._ev_actress_cls(name=actress_name)
                    #actress = self._actress_cls()
                    #actress.ev_actress = ev_actress
                    
                    #""" Add images only when actress does not exist """
                    #if item['images']['face']:
                    #    for image in item['images']['face']:
                    #        """ Don't need to check if ev_actress_image exists
                    #        if image is not in actress.images_ap
                    #        because relationship for actress and actress_image is one to many """
                    #        ev_actress_image = self._ev_actress_image_cls(name=image['name'],
                    #                                                      original_url=image['original_url'])
                    #        ev_actress.images.append(ev_actress_image)
                
                    #""" これは必要? """
                    #session.add(ev_actress)

                """ Add actress face images each time when a new title released.
                ev_actress.images is sorted by descent created time.
                新しいタイトルが追加されるごとに、face画像がある場合にはactressに追加する。
                ev_actress.imagesは作成時間が新しい物順に表示される。
                """
                if item['images']['face']:
                    for image in item['images']['face']:
                        """ Don't need to check if ev_actress_image exists
                        if image is not in actress.images_ap
                        because relationship for actress and actress_image is one to many """
                        if not image['name'] in ev_actress.images_ap:
                            ev_actress_image = self._ev_actress_image_cls(name=image['name'],
                                                                          original_url=image['original_url'])
                            ev_actress.images.append(ev_actress_image)
                
                            """ これは必要?
                            => 必要。addすることにより、仮に下のif文でTrueにならなくても
                            次のcommit()でinsert又はupdateされる。
                            """
                            session.add(ev_actress)
                        
                if not actress_name in ev_title.actresses_ap:
                    ev_title.actresses.append(ev_actress)
                    
#        for image_type, names in item['images'].iteritems():
#            for name in names:
#                """ Do nothing if ev_title already has the image """
#                if not name in ev_title.images_ap:
#                    ev_title_image = self._ev_title_image_cls(name=name,
#                                                              image_type=image_type)
        """ for title_images """
        for image_type, images in item['images'].iteritems():
            if images:
                for image in images:
                    """ Do nothing if ev_title already has the image """
                    if not image['name'] in ev_title.images_ap:
                        ev_title_image = self._ev_title_image_cls(name=image['name'],
                                                                  original_url=image['original_url'],
                                                                  image_type=image_type)
                        ev_title.images.append(ev_title_image)
                                                
        session.add(ev_title)
        session.commit()
        
        return ev_title