def search_film(film_title=None, year=None, imdb_id=None, criticker_id=None, filmweb_id=None): """ Search for a film while importing ratings """ from film20.utils.texts import normalized_text title_normalized = normalized_text(film_title) if imdb_id: try: film = Film.objects.get(imdb_code=imdb_id) if normalized_text(film.title)==title_normalized and (not year or year==film.release_year): return film else: logger.debug("WARN: not matching film! searching for: #%s %s (%s); found %s (%s)" %(imdb_id, film_title.encode('utf-8'), year, film.title.encode('utf-8'), film.release_year)) # fix for http://jira.filmaster.org/browse/FLM-491 # fetch movie by this imdb_code and check if year is same # and title is in akas then return this film movie = imdb_fetcher.get_movie_by_id( imdb_id, "http" ) if movie: if movie.get( 'year' ) == year: akas = movie.get( 'akas' ) for aka in akas: t, c = aka.split( '::' ) if t == film_title: print " -- title is: %s" % c return film else: logger.error("ERROR: this imdb_code is probably wrong ...") except Exception, e: logger.error("ERROR: %s" % e)
def prepare_title( self, obj ): result = [] # 1. add actor full_name = "%s %s" % ( obj.name, obj.surname ) result.append( full_name ) # ... and normalized normalized = normalized_text( full_name ) if normalized and not normalized in result: result.append( normalized ) # 2. add actor in current locale try: person = PersonLocalized.objects.get( person=obj, LANG=settings.LANGUAGE_CODE ) full_name = "%s %s" % ( person.name, person.surname ) if not full_name in result: result.append( full_name ) # ... and normalized normalized = normalized_text( full_name ) if normalized and not normalized in result: result.append( normalized ) except PersonLocalized.DoesNotExist: pass except Exception, e: print "Exception on person localized: ", e
def filter_films(): for film in all_results: e = n = f = False if film.title.lower()==title_lower: exact.append(film) e = True norm = normalized_text(film.title) if norm==title_normalized: normalized.append(film) n = True #if norm.startswith(title_normalized) or title_normalized.startswith(norm): if norm in title_normalized or title_normalized in norm: fuzzy.append(film) f = True if not e: for l in FilmLocalized.objects.filter(film=film.id): if not e and l.title.lower()==title_lower: exact.append(film) e = True norm = normalized_text(l.title) if not n and norm==title_normalized: normalized.append(film) n = True #if not f and (norm.startswith(title_normalized) or title_normalized.startswith(norm)): if not f and (norm in title_normalized or title_normalized in norm): fuzzy.append(film) f = True
def match_and_save(self): title = self.title title_norm = normalized_text(title) or title year = self.year directors = (d.strip() for d in (self.directors or '').split(',')) tag = self.tag film = None candidates = [] match = UNMATCHED if self.imdb_code: try: film = Film.objects.get(imdb_code=self.imdb_code) logger.debug("matched by imdb_code: %r", film) match = MATCHED except Film.DoesNotExist, e: admin = User.objects.get(username='******') try: to_import = FilmToImport.objects.get(imdb_id=self.imdb_code, comment='showtimes auto-import', user=admin) except FilmToImport.DoesNotExist: to_import = FilmToImport( user = admin, title = self.title, imdb_url = 'http://www.imdb.com/title/tt/%s/' % self.imdb_code, imdb_id = self.imdb_code, comment = 'showtimes auto-import', status = FilmToImport.ACCEPTED, ) if to_import.status == FilmToImport.ACCEPTED: to_import.is_imported = False to_import.attempts = 0 to_import.save()
def match(film, result, fuzzy=False): prod_date = result['released'] or '' name = result['name'] or '' title_norm = normalized_text(name) or name if title_norm != film.title_normalized: return False if not prod_date or not film.release_year: return False try: year = int(prod_date.split('-')[0]) except (IndexError, ValueError), e: return False
def save(self, *args, **kw): assert self.type in (TYPE_CINEMA, TYPE_TV_CHANNEL) self.name_normalized = normalized_text(self.name) if self.type == self.TYPE_CINEMA: self.country = self.town.country if not self.timezone_id: if self.town.timezone_id: self.timezone_id = self.town.timezone_id else: if self.latitude and self.longitude: self.timezone_id = timezone(self.latitude, self.longitude)['timezoneId'] self.town.timezone_id = self.timezone_id self.town.save() else: logger.warning("%s without coords", unicode(self)) return super(Channel, self).save(*args, **kw)
def save(self, *args, **kw): assert self.type in (TYPE_CINEMA, TYPE_TV_CHANNEL) self.name_normalized = normalized_text(self.name) if self.type == self.TYPE_CINEMA: self.country = self.town.country if not self.timezone_id: if self.town.timezone_id: self.timezone_id = self.town.timezone_id else: if self.latitude and self.longitude: self.timezone_id = timezone( self.latitude, self.longitude)['timezoneId'] self.town.timezone_id = self.timezone_id self.town.save() else: logger.warning("%s without coords", unicode(self)) return super(Channel, self).save(*args, **kw)
else: LIMIT = 10 query = Film.objects.all().distinct().order_by('-popularity') if year: query = query.filter(release_year__gte=int(year)-1, release_year__lte=int(year)+1) if tag: query = query.tagged(tag) q1 = Q(title_normalized=title_norm) q2 = Q(filmlocalized__title_normalized=title_norm, filmlocalized__LANG=settings.LANGUAGE_CODE) films = query.filter(q1|q2)[:LIMIT] dset = set(normalized_text(d) for d in directors) def directors_match(film): film_directors = set() for d in film.directors.all(): film_directors.add(normalized_text(unicode(d))) film_directors.add(normalized_text(d.format())) return film_directors & dset dfilms = filter(directors_match, films) dmatched = bool(dfilms) if dmatched: films = dfilms film = len(films)==1 and films[0] or None candidates = len(films) > 1 and films or [] match = film and MATCHED or candidates and HAS_CANDIDATES or UNMATCHED elif tag and len(films) == 1:
def letters(value): return text_letters(normalized_text(value))
def normalized_title(t): m = title_re.match(t) return normalized_text(m and m.group(1) or t)
def normalize(value): return normalized_text(value)
def root_normalize(value): return text_root_normalized(normalized_text(value))
def directors_match(film): film_directors = set() for d in film.directors.all(): film_directors.add(normalized_text(unicode(d))) film_directors.add(normalized_text(d.format())) return film_directors & dset