def handleTraceback(object): context = object.context entry_url = object.entry_url if entry_url is None: return LOGGER.info("handle traceback [%s]" % entry_url) try: cleanup_lock.acquire() # we don't want to produce any errors here, thus, we'll be nice and die # silently if an error occurs here try: transaction.begin() # get our logbook view to use the api logbook = context.unrestrictedTraverse('@@logbook') # get the generated error url from Products.SiteErrorLog err_id = urllib.splitvalue(entry_url)[1] # save error logbook.save_error(err_id, context=aq_parent(context)) transaction.get().note('collective.logbook traceback [%s]' % entry_url) transaction.commit() finally: cleanup_lock.release() # only warning except Exception, e: LOGGER.warning("An error occured while handling the traceback") LOGGER.warning("%s" % e) LOGGER.exception(e)
def upload_file(): try: uploaded_file = request.files['audio_data'] if uploaded_file.filename != '': uploaded_file.save('/tmp/' + uploaded_file.filename + '.wav') s3 = boto3.client('s3') s3.upload_file('/tmp/' + uploaded_file.filename + '.wav', 'datasets-masters-2020', uploaded_file.filename + '.wav') return "Success", 200 except Exception as e: logger.exception(e) return "Error", 500
def facebook_convert_to_id(fb_url): # https://www.facebook.com/profile.php?id=1000 url_parse = urlparse(fb_url) if "facebook.com" in url_parse.netloc: if "/profile.php" in url_parse.path: preg = "id=(\d{1,})" m = re.findall(preg, url_parse.query) if len(m): LOGGER.info( 'successfully converted facebook id for url : [%s to %s]' % (fb_url, m[0])) return m[0] # https://facebook.com/zuck headers = get_headers() csrfmiddlewaretoken = get_csrfmiddlewaretoken(headers) if csrfmiddlewaretoken: data = { "csrfmiddlewaretoken": csrfmiddlewaretoken, "fburl": fb_url } headers.update({ "Cookie": "csrftoken={csrfmiddlewaretoken}".format( csrfmiddlewaretoken=csrfmiddlewaretoken) }) try: r = requests.post('http://findmyfbid.in/', data=data, timeout=10, allow_redirects=False, proxies=PROXIES, headers=headers) location = r.headers['Location'] preg = r'\d{1,}' m = re.findall(preg, location) if len(m): LOGGER.info( 'successfully converted facebook id for url : [%s to %s]' % (fb_url, m[0])) return m[0] else: return None except Exception as e: LOGGER.exception(e) return None else: return None else: return None
async def fetch(client, url): with async_timeout.timeout(15): try: headers = {'user-agent': get_random_user_agent()} async with client.get(url, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) try: text = await response.text() except: text = await response.read() return text except Exception as e: LOGGER.exception(e) return None
def get_csrfmiddlewaretoken(headers) -> str: csrfmiddlewaretoken = '' try: result = requests.get(url="http://findmyfbid.in/", headers=headers, proxies=PROXIES, timeout=10).text preg = r"name='csrfmiddlewaretoken' value='(.*?)'" preg_token = re.findall(preg, result) if preg_token: csrfmiddlewaretoken = preg_token[0] LOGGER.info('successfully get csrfmiddlewaretoken: [%s]' % (csrfmiddlewaretoken)) except Exception as e: LOGGER.exception(e) return csrfmiddlewaretoken
def process_file(event, context): try: bucket = event['Records'][0]['s3']['bucket']['name'] s3_client = boto3.client('s3') key = event['Records'][0]['s3']['object']['key'] if key.split('/')[0] != 'splits': # Get the bytes from S3 file_loc = '/tmp/' + key # Download this file to writable tmp space. logger.debug(file_loc) logger.debug(key) logger.debug(bucket) s3_client.download_file(bucket, key, file_loc) song = AudioSegment.from_wav(file_loc) dBFS = song.dBFS chunks = split_on_silence( song, min_silence_len=1000, # anything under -16 dBFS is considered silence silence_thresh=dBFS - 16, # keep 200 ms of leading/trailing silence keep_silence=200) logger.debug(chunks) for i, chunk in enumerate(chunks): silence_chunk = AudioSegment.silent(duration=200) audio_chunk = silence_chunk + chunk + silence_chunk normalized_chunk = match_target_amplitude(audio_chunk, -20.0) logger.debug("Exporting chunk{0}.mp3.".format(i)) normalized_chunk.export("/tmp/chunk{0}.mp3".format(i), bitrate="320k", format="mp3") s3_client.upload_file( "/tmp/chunk{0}.mp3".format(i), 'datasets-masters-2020', "splits/{0}/chunk_{1}.mp3".format(key.split('.')[0], i)) return else: logger.debug('Nothing to do here') return except Exception as e: logger.exception(e) return "Error", 500
async def data_extraction_for_phone(html): with async_timeout.timeout(10): try: # Get title data_log = eval(html['data-log']) url = data_log.get('mu', None) if not url: return None # Get title title = html.find('h3').get_text() # Get author and update_time (option) novel_mess = html.findAll(class_='c-gap-right-large') basic_mess = [i.get_text() for i in novel_mess] if novel_mess else None return {'title': title, 'url': url, 'basic_mess': basic_mess} except Exception as e: LOGGER.exception(e) return None
async def get_photo_url(self, loop) -> bool: """ Get user photo's url :return: True or False """ async with aiohttp.ClientSession(loop=None) as client: asyncio.sleep(1) html = await self.fetch(client=client, url=self.single_url) if html: preg_type = r"<meta\s*property=\"og:type\"\s*content=\"(.*?)\"\s*/>" type = re.findall(preg_type, str(html)) if type: if type[0] == "video": preg_url = r"<meta\s*property=\"og:video\"\s*content=\"(.*?)\"\s*/>" else: preg_url = r"<meta\s*property=\"og:image\"\s*content=\"(.*?)\"\s*/>" target_url_res = re.findall(preg_url, str(html)) if target_url_res: target_url = target_url_res[0] target_name = target_url[-25:].replace('/', '-') if not os.path.exists(self._dir + target_name): asyncio.sleep(random.randint(1, 2)) target_result = await self.fetch(client=client, url=target_url) if target_result: LOGGER.info("Downloading {target_url}".format( target_url=target_url)) try: with open(self._dir + target_name, 'wb') as file: file.write(target_result) LOGGER.info( 'File downloaded successfully in {dir}' .format(dir=self._dir + target_name)) return True except Exception as e: LOGGER.exception(e) return False else: return True return False else: return False
async def fetch(self, client, url) -> bytes: """ fetch url :param client: aiohttp client :param url: request url :return: response.read() """ with async_timeout.timeout(10): try: headers = {'user-agent': self.get_random_user_agent()} proxy = PROXIES if PROXIES else None async with client.get(url, headers=headers, proxy=proxy) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) text = await response.read() return text except Exception as e: LOGGER.exception(e) return None
async def get_real_url(client, url): with async_timeout.timeout(10): try: headers = {'user-agent': get_random_user_agent()} async with client.get(url, headers=headers, allow_redirects=True) as response: assert response.status == 200 LOGGER.info('Parse url: {}'.format(response.url)) # text = "" # try: # text = await response.text() # except: # text = await response.read() # if text: # print(text) # text = re.findall(r'replace\(\"(.*?)\"\)', str(text)) # text = text[0] if text[0] else "" url = response.url if response.url else None return url except Exception as e: LOGGER.exception(e) return None
def Cold_boot(self, url, pause=2): time.sleep(pause) headers = {'user-agent': self.get_random_user_agent()} try: requests.packages.urllib3.disable_warnings( requests.packages.urllib3.exceptions.InsecureRequestWarning) r = requests.get(url=url, proxies=self.proxies, headers=headers, allow_redirects=False, verify=False, timeout=30) LOGGER.info(url) content = r.content charset = cchardet.detect(content) text = content.decode(charset['encoding']) bsObj = BeautifulSoup(text, "lxml") return bsObj except Exception as e: LOGGER.exception(e) return None