def context(self, ops): if isinstance(ops, Comment): self.ops = SteemComment(comment=ops) else: self.ops = SteemOperation(ops=ops) logger.debug("watch operation: {}; tags: {}".format( self.ops.get_url(), self.ops.get_tags()))
def make_queued(): if not request.json: return abort(400) data = request.get_json() logger.debug(data) working_id = random_string() # return make_video(data) working_status[working_id] = {'data': data, 'status': 'NOT STARTED'} threading.Thread(target=proceed, args=(working_id, )).start() return json.dumps({'type': 'ok', 'id': working_id})
def get_metadata(self): if 'json_metadata' in self.ops and len(self.ops['json_metadata']) > 0: try: metadata = json.loads(self.ops['json_metadata']) if metadata and isinstance(metadata, dict): return metadata else: logger.debug("not well formatted metadata: ".format( self.ops['json_metadata'])) except: logger.debug("failed when parsing metadata. Error: {}".format( traceback.format_exc())) return None
def get_status(): if not request.json: return abort(400) data = request.get_json() logger.debug(data) if not 'id' in data: return make_error("Id not found") current_id = data['id'] logger.debug(current_id) logger.debug(working_status) if not current_id in working_status: return make_error("Unknown id: {:s}".format(current_id)) if working_status[current_id]['status'] == 'process': return json.dumps({ 'status': 'process', 'message': working_status[current_id]['message'] }) elif working_status[current_id]['status'] == 'error': return json.dumps({ 'status': 'error', 'error': working_status[current_id]['error'] }) elif working_status[current_id]['status'] == 'ready': return json.dumps({ 'status': 'ready', 'url': working_status[current_id]['url'] }) else: logger.debug(working_status[current_id])
def load_from_link(link): try: scraped_data = urllib.request.urlopen(link) article = scraped_data.read() parsed_article = bs.BeautifulSoup(article, 'lxml') paragraphs = parsed_article.find_all('p') article_text = "" for p in paragraphs: article_text += p.text # divs = parsed_article.find_all('div') # for div in divs: # print("div") # article_text += div.text logger.debug(article_text) return article_text, None except Exception as error: logger.error(error) return None, "We cannot download article for your link"
def download(self, href, config): logger.warning(self.errored) token = href.split('=')[1] file_name = token + '-' + str(config['height']) file_path = DOWNLOAD_PATH + "/" + file_name + ".mp4" if path.exists(file_path): logger.debug('Already exists') return file_path if href in self.errored: logger.warning('Was errored before', href) return None else: logger.debug('Wasn\'t errored') try: yt = pytube.YouTube(href) video_filter = yt.streams\ .filter(subtype='mp4') \ .filter(progressive=False) quality = 0 for video in video_filter.all(): resolution = video.resolution logger.debug(f"get {video.url}") if resolution is not None: resolution = int(video.resolution.replace('p', '')) if resolution <= config['height'] and resolution >= quality: quality = resolution video_filter = video_filter.filter(resolution=str(quality) + "p") video = video_filter.first() logger.info("Quality: " + str(quality) + "p") if video is None: self.errored[href] = True self.__save_cache__() return None subtype = video.subtype print(f"Downloading {DOWNLOAD_PATH}") video.download( DOWNLOAD_PATH, filename=file_name ) return file_path except Exception as error: logger.error('Error handled', error) self.errored[href] = True self.__save_cache__() return None
def load_image(img_url): file_name = os.path.join(DOWNLOAD_PATH, hsh(img_url) + '.jpg') logger.debug(f"file for image {file_name}") if os.path.exists(file_name): logger.debug("find image") return file_name response = requests.get(img_url) img = Image.open(BytesIO(response.content)) width = img.width height = img.height if width % 2 == 1: width -= 1 if height % 2 == 1: height -= 1 img = img.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.ANTIALIAS) img.save(file_name) logger.debug( f"Image with size: {img.width} {img.height} save to {file_name}") return file_name
def search(): if not request.json: return abort(400) if DEMO: return load_json("beta/search_top.json") req = request.get_json() logger.debug(req) config = Config() error = None if not 'type' in req: error = "Type field cannot be empty" reqType = None if error == None: reqType = str(req['type']) if not reqType in ['link', 'text']: error = "Unknown request type: " + reqType if error != None: return make_error(error) if reqType == 'text' and not 'text' in req: error = "Text field is empty" if reqType == 'link' and not 'link' in req: error = "Link field is empty" logger.info("Request type " + reqType) if error != None: return make_error(error) data = '' if reqType == 'text': data = req["text"] else: data, error = load_from_link(req['link']) if error != None: return make_error(error) data, error = config.analyzer.analyze(data) if error != None: return make_error(error) videos = [] bad_videos = {} logger.debug(data) for sentence in data['data']: logger.debug(sentence) buffer = [] for elem in data['data'][sentence]: logger.debug(elem) if elem['type'] == 'video': if not elem['href'] in bad_videos: buffer.append(elem) else: logger.warning("Found bad video: " + elem) else: buffer.append(elem) print() data['data'][sentence] = buffer return json.dumps(data)
def __save_cache__(self): logger.debug('Saving cache') with open('error_cache.txt', 'w') as out: out.write(json.dumps(self.errored))
def make(): if not request.json: return abort(400) data = request.get_json() logger.debug(data) return make_video(data)
def make_video(data, current_id=None): with open("make_req.json", 'w') as out: out.write(json.dumps(data)) ints = [] error = None video_config = load_config(data) logger.debug(video_config) set_process_status(current_id, "Инициализация") download_time = 0 download_start = 0 download_finish = 0 if not 'intervals' in data: error = "Unknown request format" else: download_start = time.time() intervals = data['intervals'] index = 0 for interval in intervals: logger.debug(interval) if not 'type' in interval: error = 'For one or more intervals type is not specified' break if interval['type'] == 'video': error = checkVideoInterval(interval) if error != None: break video_loaded = config.downloader.download( interval['href'], video_config) if video_loaded == None: error = "Sorry but we cannot download one or more of videos you selected" break video_src = video_loaded logger.debug(video_src) ints.append( VideoInterval(interval['begin'], interval['end'], interval['text'], video_src, interval['video_begin'], interval['video_end'])) elif interval['type'] == 'image': error = checkImageInterval(interval) if error != None: break image_src = load_image(interval['href']) logger.debug(image_src) ints.append( ImageInterval(interval['begin'], interval['end'], interval['text'], image_src)) index += 1 setProcessStatus( current_id, "Загружено: {:d}/{:d}".format(index, len(intervals))) setProcessStatus(current_id, "Все видео успешно загружены") download_finish = time.time() download_time = download_finish - download_start if error == None: making_time = 0 make_begin = time.time() res_file = config.maker.make(ints, "none", video_config, current_id=current_id, icon=None, overlay=None) make_end = time.time() making_time = make_end - make_begin logger.info("Download time: {:.2f}, making time: {:.2f}".format( download_time, making_time)) if current_id != None: setReadyStatus(current_id, res_file) return json.dumps({'type': 'ok', 'url': res_file}) else: if current_id != None: setErrorStatus(current_id, str(error)) return json.dumps({'type': 'error', 'error': str(error)})
def find_images(text, limit=1): logger.debug(f"start find images for {text}") try: query = {"text": text} query = urllib.parse.urlencode(query) url = "https://yandex.ru/images/search?" + query logger.debug(f"get url {url}") response = urllib.request.urlopen(url) html = response.read() soup = BeautifulSoup(html, "html.parser") images = [] for image in soup.find_all(attrs={"class": "serp-item__link"}): images.append(image["href"]) logger.debug(f"get {len(images)} images before limiting") limit = min(limit, len(images)) images = images[:limit] logger.info(f"get {len(images)} images") hrefs = [] for href in images: url = "https://yandex.ru" + href logger.debug(f"get image from {url}") session = HTMLSession() response = session.get(url) response.html.render() logger.debug(response.html) images = response.html.find('.preview2__arrow-image') logger.debug(images) src = None for image in images: logger.debug(image.attrs) try: test_src = image.attrs["src"] logger.debug(f"find src {src}") src = test_src break except Exception as error: logger.error(error) if src is not None: hrefs.append(src) return hrefs except Exception as error: logger.error(error) return []