def safeMessage(self, msg, replace_tuple = ()): from whatpotato.core.helpers.encoding import ss, toUnicode msg = ss(msg) try: if isinstance(replace_tuple, tuple): msg = msg % tuple([ss(x) if not isinstance(x, (int, float)) else x for x in list(replace_tuple)]) elif isinstance(replace_tuple, dict): msg = msg % dict((k, ss(v) if not isinstance(v, (int, float)) else v) for k, v in replace_tuple.iteritems()) else: msg = msg % ss(replace_tuple) except Exception as e: self.logger.error('Failed encoding stuff to log "%s": %s' % (msg, e)) self.setup() if not self.is_develop: for replace in self.replace_private: msg = re.sub('(\?%s=)[^\&]+' % replace, '?%s=xxx' % replace, msg) msg = re.sub('(&%s=)[^\&]+' % replace, '&%s=xxx' % replace, msg) # Replace api key try: api_key = self.Env.setting('api_key') if api_key: msg = msg.replace(api_key, 'API_KEY') except: pass return toUnicode(msg)
def getFromURL(self, url): log.debug('Getting IMDBs from: %s', url) html = self.getHTMLData(url) try: split = splitString(html, split_on = "<div class=\"list compact\">")[1] html = splitString(split, split_on = "<div class=\"pages\">")[0] except: try: split = splitString(html, split_on = "<div id=\"main\">") if len(split) < 2: log.error('Failed parsing IMDB page "%s", unexpected html.', url) return [] html = BeautifulSoup(split[1]) for x in ['list compact', 'lister', 'list detail sub-list']: html2 = html.find('div', attrs = { 'class': x }) if html2: html = html2.contents html = ''.join([str(x) for x in html]) break except: log.error('Failed parsing IMDB page "%s": %s', (url, traceback.format_exc())) html = ss(html) imdbs = getImdb(html, multiple = True) if html else [] return imdbs
def validate(self, name = None): if not name: return name_enc = base64.b64encode(ss(name)) return self.getJsonData(self.urls['validate'] % name_enc, headers = self.getRequestHeaders())
def getMeta(self, filename): try: p = enzyme.parse(filename) # Video codec vc = ('H264' if p.video[0].codec == 'AVC1' else p.video[0].codec) # Audio codec ac = p.audio[0].codec try: ac = self.audio_codec_map.get(p.audio[0].codec) except: pass # Find title in video headers titles = [] try: if p.title and self.findYear(p.title): titles.append(ss(p.title)) except: log.error('Failed getting title from meta: %s', traceback.format_exc()) for video in p.video: try: if video.title and self.findYear(video.title): titles.append(ss(video.title)) except: log.error('Failed getting title from meta: %s', traceback.format_exc()) return { 'titles': list(set(titles)), 'video': vc, 'audio': ac, 'resolution_width': tryInt(p.video[0].width), 'resolution_height': tryInt(p.video[0].height), 'audio_channels': p.audio[0].channels, } except enzyme.exceptions.ParseError: log.debug('Failed to parse meta for %s', filename) except enzyme.exceptions.NoParserError: log.debug('No parser found for %s', filename) except: log.debug('Failed parsing %s', filename) return {}
def getMultImages(self, movie, type = 'backdrops', size = 'original'): image_urls = [] try: for image in movie.get('images', {}).get(type, [])[1:5]: image_urls.append(self.getImage(image, 'file', size)) except: log.debug('Failed getting %s.%s for "%s"', (type, size, ss(str(movie)))) return image_urls
def containsTagScore(self, quality, words, cur_file = ''): cur_file = ss(cur_file) score = 0.0 extension = words[-1] words = words[:-1] points = { 'identifier': 25, 'label': 25, 'alternative': 20, 'tags': 11, 'ext': 5, } scored_on = [] # Check alt and tags for tag_type in ['identifier', 'alternative', 'tags', 'label']: qualities = quality.get(tag_type, []) qualities = [qualities] if isinstance(qualities, (str, unicode)) else qualities for alt in qualities: if isinstance(alt, tuple): if len(set(words) & set(alt)) == len(alt): log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file)) score += points.get(tag_type) if isinstance(alt, (str, unicode)) and ss(alt.lower()) in words and ss(alt.lower()) not in scored_on: log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file)) score += points.get(tag_type) # Don't score twice on same tag scored_on.append(ss(alt).lower()) # Check extension for ext in quality.get('ext', []): if ext == extension: log.debug('Found %s with .%s extension in %s', (quality['identifier'], ext, cur_file)) score += points['ext'] return score
def getImage(self, movie, type = 'poster', size = 'poster'): image_url = '' try: path = movie.get('%s_path' % type) if path: image_url = '%s%s%s' % (self.configuration['images']['secure_base_url'], size, path) except: log.debug('Failed getting %s.%s for "%s"', (type, size, ss(str(movie)))) return image_url
def get3dType(self, filename): filename = ss(filename) words = re.split('\W+', filename.lower()) for key in self.threed_types: tags = self.threed_types.get(key, []) for tag in tags: if (isinstance(tag, tuple) and '.'.join(tag) in '.'.join(words)) or (isinstance(tag, (str, unicode)) and ss(tag.lower()) in words): log.debug('Found %s in %s', (tag, filename)) return key return ''
def contains3D(self, quality, words, cur_file = ''): cur_file = ss(cur_file) for key in self.threed_tags: tags = self.threed_tags.get(key, []) for tag in tags: if isinstance(tag, tuple): if len(set(words) & set(tag)) == len(tag): log.debug('Found %s in %s', (tag, cur_file)) return 1, key elif tag in words: log.debug('Found %s in %s', (tag, cur_file)) return 1, key return 0, None
def getRSSData(self, url, item_path = 'channel/item', **kwargs): cache_key = md5(url) data = self.getCache(cache_key, url, **kwargs) if data and len(data) > 0: try: data = XMLTree.fromstring(data) return self.getElements(data, item_path) except: try: data = XMLTree.fromstring(ss(data)) return self.getElements(data, item_path) except XmlParseError: log.error('Invalid XML returned, check "%s" manually for issues', url) except: log.error('Failed to parsing %s: %s', (self.getName(), traceback.format_exc())) return []
def download(self, url = '', dest = None, overwrite = False, urlopen_kwargs = None): if not urlopen_kwargs: urlopen_kwargs = {} # Return response object to stream download urlopen_kwargs['stream'] = True if not dest: # to Cache dest = os.path.join(Env.get('cache_dir'), ss('%s.%s' % (md5(url), getExt(url)))) dest = sp(dest) if not overwrite and os.path.isfile(dest): return dest try: filedata = self.urlopen(url, **urlopen_kwargs) except: log.error('Failed downloading file %s: %s', (url, traceback.format_exc())) return False self.createFile(dest, filedata, binary = True) return dest
def getImdb(txt, check_inside = False, multiple = False): if not check_inside: txt = simplifyString(txt) else: txt = ss(txt) if check_inside and os.path.isfile(txt): output = open(txt, 'r') txt = output.read() output.close() try: ids = re.findall('(tt\d{4,7})', txt) if multiple: return removeDuplicate(['tt%07d' % tryInt(x[2:]) for x in ids]) if len(ids) > 0 else [] return 'tt%07d' % tryInt(ids[0][2:]) except IndexError: pass return False
def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, stream = False): url = quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not data: data = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', None) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') use_proxy = Env.setting('use_proxy') proxy_url = None if use_proxy: proxy_server = Env.setting('proxy_server') proxy_username = Env.setting('proxy_username') proxy_password = Env.setting('proxy_password') if proxy_server: loc = "{0}:{1}@{2}".format(proxy_username, proxy_password, proxy_server) if proxy_username else proxy_server proxy_url = { "http": "http://"+loc, "https": "https://"+loc, } else: proxy_url = getproxies() r = Env.get('http_opener') # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception('Disabled calls to %s for 15 minutes because so many failed requests' % host) else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host, url) status_code = None try: kwargs = { 'headers': headers, 'data': data if len(data) > 0 else None, 'timeout': timeout, 'files': files, 'verify': False, #verify_ssl, Disable for now as to many wrongly implemented certificates.. 'stream': stream, 'proxies': proxy_url, } method = 'post' if len(data) > 0 or files else 'get' log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance(data, dict) else 'with data')) response = r.request(method, url, **kwargs) status_code = response.status_code if response.status_code == requests.codes.ok: data = response if stream else response.content else: response.raise_for_status() self.http_failed_request[host] = 0 except (IOError, MaxRetryError, Timeout): if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0))) # Save failed requests by hosts try: # To many requests if status_code in [429]: self.http_failed_request[host] = 1 self.http_failed_disabled[host] = time.time() if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP(host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def is_hidden(self, filepath): name = ss(os.path.basename(os.path.abspath(filepath))) return name.startswith('.') or self.has_hidden_attribute(filepath)
def download(self, data = None, media = None, filedata = None): """ Send a torrent/nzb file to the downloader :param data: dict returned from provider Contains the release information :param media: media dict with information Used for creating the filename when possible :param filedata: downloaded torrent/nzb filedata The file gets downloaded in the searcher and send to this function This is done to have failed checking before using the downloader, so the downloader doesn't need to worry about that :return: boolean One faile returns false, but the downloaded should log his own errors """ if not media: media = {} if not data: data = {} if not filedata: log.error('Unable to get NZB file: %s', traceback.format_exc()) return False log.info('Sending "%s" to NZBGet.', data.get('name')) nzb_name = ss('%s.nzb' % self.createNzbName(data, media)) rpc = self.getRPC() try: if rpc.writelog('INFO', 'CouchPotato connected to drop off %s.' % nzb_name): log.debug('Successfully connected to NZBGet') else: log.info('Successfully connected to NZBGet, but unable to send a message') except socket.error: log.error('NZBGet is not responding. Please ensure that NZBGet is running and host setting is correct.') return False except xmlrpclib.ProtocolError as e: if e.errcode == 401: log.error('Password is incorrect.') else: log.error('Protocol Error: %s', e) return False if re.search(r"^0", rpc.version()): xml_response = rpc.append(nzb_name, self.conf('category'), False, standard_b64encode(filedata.strip())) else: xml_response = rpc.append(nzb_name, self.conf('category'), tryInt(self.conf('priority')), False, standard_b64encode(filedata.strip())) if xml_response: log.info('NZB sent successfully to NZBGet') nzb_id = md5(data['url']) # about as unique as they come ;) couchpotato_id = "whatpotato=" + nzb_id groups = rpc.listgroups() file_id = [item['LastID'] for item in groups if item['NZBFilename'] == nzb_name] confirmed = rpc.editqueue("GroupSetParameter", 0, couchpotato_id, file_id) if confirmed: log.debug('whatpotato parameter set in nzbget download') return self.downloadReturnId(nzb_id) else: log.error('NZBGet could not add %s to the queue.', nzb_name) return False
def download(self, data = None, media = None, filedata = None): """ Send a torrent/nzb file to the downloader :param data: dict returned from provider Contains the release information :param media: media dict with information Used for creating the filename when possible :param filedata: downloaded torrent/nzb filedata The file gets downloaded in the searcher and send to this function This is done to have failed checking before using the downloader, so the downloader doesn't need to worry about that :return: boolean One faile returns false, but the downloaded should log his own errors """ if not media: media = {} if not data: data = {} log.info('Sending "%s" to SABnzbd.', data.get('name')) req_params = { 'cat': self.conf('category'), 'mode': 'addurl', 'nzbname': self.createNzbName(data, media), 'priority': self.conf('priority'), } nzb_filename = None if filedata: if len(filedata) < 50: log.error('No proper nzb available: %s', filedata) return False # If it's a .rar, it adds the .rar extension, otherwise it stays .nzb nzb_filename = self.createFileName(data, filedata, media) req_params['mode'] = 'addfile' else: req_params['name'] = data.get('url') try: if nzb_filename and req_params.get('mode') is 'addfile': sab_data = self.call(req_params, files = {'nzbfile': (ss(nzb_filename), filedata)}) else: sab_data = self.call(req_params) except URLError: log.error('Failed sending release, probably wrong HOST: %s', traceback.format_exc(0)) return False except: log.error('Failed sending release, use API key, NOT the NZB key: %s', traceback.format_exc(0)) return False log.debug('Result from SAB: %s', sab_data) nzo_ids = sab_data.get('nzo_ids', []) if sab_data.get('status') and not sab_data.get('error') and isinstance(nzo_ids, list) and len(nzo_ids) > 0: log.info('NZB sent to SAB successfully.') if filedata: return self.downloadReturnId(nzo_ids[0]) else: return True else: log.error('Error getting data from SABNZBd: %s', sab_data) return False
def md5(text): return hashlib.md5(ss(text)).hexdigest()
def scan(self, folder = None, files = None, release_download = None, simple = False, newer_than = 0, return_ignored = True, check_file_date = True, on_found = None): folder = sp(folder) if not folder or not os.path.isdir(folder): log.error('Folder doesn\'t exists: %s', folder) return {} # Get movie "master" files movie_files = {} leftovers = [] # Scan all files of the folder if no files are set if not files: try: files = [] for root, dirs, walk_files in os.walk(folder, followlinks=True): files.extend([sp(os.path.join(sp(root), ss(filename))) for filename in walk_files]) # Break if CP wants to shut down if self.shuttingDown(): break except: log.error('Failed getting files from %s: %s', (folder, traceback.format_exc())) log.debug('Found %s files to scan and group in %s', (len(files), folder)) else: check_file_date = False files = [sp(x) for x in files] for file_path in files: if not os.path.exists(file_path): continue # Remove ignored files if self.isSampleFile(file_path): leftovers.append(file_path) continue elif not self.keepFile(file_path): continue is_dvd_file = self.isDVDFile(file_path) if self.filesizeBetween(file_path, self.file_sizes['movie']) or is_dvd_file: # Minimal 300MB files or is DVD file # Normal identifier identifier = self.createStringIdentifier(file_path, folder, exclude_filename = is_dvd_file) identifiers = [identifier] # Identifier with quality quality = fireEvent('quality.guess', files = [file_path], size = self.getFileSize(file_path), single = True) if not is_dvd_file else {'identifier':'dvdr'} if quality: identifier_with_quality = '%s %s' % (identifier, quality.get('identifier', '')) identifiers = [identifier_with_quality, identifier] if not movie_files.get(identifier): movie_files[identifier] = { 'unsorted_files': [], 'identifiers': identifiers, 'is_dvd': is_dvd_file, } movie_files[identifier]['unsorted_files'].append(file_path) else: leftovers.append(file_path) # Break if CP wants to shut down if self.shuttingDown(): break # Cleanup del files # Sort reverse, this prevents "Iron man 2" from getting grouped with "Iron man" as the "Iron Man 2" # files will be grouped first. leftovers = set(sorted(leftovers, reverse = True)) # Group files minus extension ignored_identifiers = [] for identifier, group in movie_files.items(): if identifier not in group['identifiers'] and len(identifier) > 0: group['identifiers'].append(identifier) log.debug('Grouping files: %s', identifier) has_ignored = 0 for file_path in list(group['unsorted_files']): ext = getExt(file_path) wo_ext = file_path[:-(len(ext) + 1)] found_files = set([i for i in leftovers if wo_ext in i]) group['unsorted_files'].extend(found_files) leftovers = leftovers - found_files has_ignored += 1 if ext == 'ignore' else 0 if has_ignored == 0: for file_path in list(group['unsorted_files']): ext = getExt(file_path) has_ignored += 1 if ext == 'ignore' else 0 if has_ignored > 0: ignored_identifiers.append(identifier) # Break if CP wants to shut down if self.shuttingDown(): break # Create identifiers for all leftover files path_identifiers = {} for file_path in leftovers: identifier = self.createStringIdentifier(file_path, folder) if not path_identifiers.get(identifier): path_identifiers[identifier] = [] path_identifiers[identifier].append(file_path) # Group the files based on the identifier delete_identifiers = [] for identifier, found_files in path_identifiers.items(): log.debug('Grouping files on identifier: %s', identifier) group = movie_files.get(identifier) if group: group['unsorted_files'].extend(found_files) delete_identifiers.append(identifier) # Remove the found files from the leftover stack leftovers = leftovers - set(found_files) # Break if CP wants to shut down if self.shuttingDown(): break # Cleaning up used for identifier in delete_identifiers: if path_identifiers.get(identifier): del path_identifiers[identifier] del delete_identifiers # Group based on folder delete_identifiers = [] for identifier, found_files in path_identifiers.items(): log.debug('Grouping files on foldername: %s', identifier) for ff in found_files: new_identifier = self.createStringIdentifier(os.path.dirname(ff), folder) group = movie_files.get(new_identifier) if group: group['unsorted_files'].extend([ff]) delete_identifiers.append(identifier) # Remove the found files from the leftover stack leftovers -= leftovers - set([ff]) # Break if CP wants to shut down if self.shuttingDown(): break # leftovers should be empty if leftovers: log.debug('Some files are still left over: %s', leftovers) # Cleaning up used for identifier in delete_identifiers: if path_identifiers.get(identifier): del path_identifiers[identifier] del delete_identifiers # Make sure we remove older / still extracting files valid_files = {} while True and not self.shuttingDown(): try: identifier, group = movie_files.popitem() except: break # Check if movie is fresh and maybe still unpacking, ignore files newer than 1 minute if check_file_date: files_too_new, time_string = self.checkFilesChanged(group['unsorted_files']) if files_too_new: log.info('Files seem to be still unpacking or just unpacked (created on %s), ignoring for now: %s', (time_string, identifier)) # Delete the unsorted list del group['unsorted_files'] continue # Only process movies newer than x if newer_than and newer_than > 0: has_new_files = False for cur_file in group['unsorted_files']: file_time = self.getFileTimes(cur_file) if file_time[0] > newer_than or file_time[1] > newer_than: has_new_files = True break if not has_new_files: log.debug('None of the files have changed since %s for %s, skipping.', (time.ctime(newer_than), identifier)) # Delete the unsorted list del group['unsorted_files'] continue valid_files[identifier] = group del movie_files total_found = len(valid_files) # Make sure only one movie was found if a download ID is provided if release_download and total_found == 0: log.info('Download ID provided (%s), but no groups found! Make sure the download contains valid media files (fully extracted).', release_download.get('imdb_id')) elif release_download and total_found > 1: log.info('Download ID provided (%s), but more than one group found (%s). Ignoring Download ID...', (release_download.get('imdb_id'), len(valid_files))) release_download = None # Determine file types processed_movies = {} while True and not self.shuttingDown(): try: identifier, group = valid_files.popitem() except: break if return_ignored is False and identifier in ignored_identifiers: log.debug('Ignore file found, ignoring release: %s', identifier) total_found -= 1 continue # Group extra (and easy) files first group['files'] = { 'movie_extra': self.getMovieExtras(group['unsorted_files']), 'subtitle': self.getSubtitles(group['unsorted_files']), 'subtitle_extra': self.getSubtitlesExtras(group['unsorted_files']), 'nfo': self.getNfo(group['unsorted_files']), 'trailer': self.getTrailers(group['unsorted_files']), 'leftover': set(group['unsorted_files']), } # Media files if group['is_dvd']: group['files']['movie'] = self.getDVDFiles(group['unsorted_files']) else: group['files']['movie'] = self.getMediaFiles(group['unsorted_files']) if len(group['files']['movie']) == 0: log.error('Couldn\'t find any movie files for %s', identifier) total_found -= 1 continue log.debug('Getting metadata for %s', identifier) group['meta_data'] = self.getMetaData(group, folder = folder, release_download = release_download) # Subtitle meta group['subtitle_language'] = self.getSubtitleLanguage(group) if not simple else {} # Get parent dir from movie files for movie_file in group['files']['movie']: group['parentdir'] = os.path.dirname(movie_file) group['dirname'] = None folder_names = group['parentdir'].replace(folder, '').split(os.path.sep) folder_names.reverse() # Try and get a proper dirname, so no "A", "Movie", "Download" etc for folder_name in folder_names: if folder_name.lower() not in self.ignore_names and len(folder_name) > 2: group['dirname'] = folder_name break break # Leftover "sorted" files for file_type in group['files']: if not file_type is 'leftover': group['files']['leftover'] -= set(group['files'][file_type]) group['files'][file_type] = list(group['files'][file_type]) group['files']['leftover'] = list(group['files']['leftover']) # Delete the unsorted list del group['unsorted_files'] # Determine movie group['media'] = self.determineMedia(group, release_download = release_download) if not group['media']: log.error('Unable to determine media: %s', group['identifiers']) else: group['identifier'] = getIdentifier(group['media']) or group['media']['info'].get('imdb') processed_movies[identifier] = group # Notify parent & progress on something found if on_found: on_found(group, total_found, len(valid_files)) # Wait for all the async events calm down a bit while threading.activeCount() > 100 and not self.shuttingDown(): log.debug('Too many threads active, waiting a few seconds') time.sleep(10) if len(processed_movies) > 0: log.info('Found %s movies in the folder %s', (len(processed_movies), folder)) else: log.debug('Found no movies in the folder %s', folder) return processed_movies
def add_torrent_file(self, filename, filedata, add_folder = False): action = 'action=add-file' if add_folder: action += '&path=%s' % urllib.quote(filename) return self._request(action, {'torrent_file': (ss(filename), filedata)})