예제 #1
0
def handleTraceback(object):
    context = object.context
    entry_url = object.entry_url

    if entry_url is None:
        return

    LOGGER.info("handle traceback [%s]" % entry_url)
    try:
        cleanup_lock.acquire()
        # we don't want to produce any errors here, thus, we'll be nice and die
        # silently if an error occurs here
        try:
            transaction.begin()
            # get our logbook view to use the api
            logbook = context.unrestrictedTraverse('@@logbook')
            # get the generated error url from Products.SiteErrorLog
            err_id = urllib.splitvalue(entry_url)[1]
            # save error
            logbook.save_error(err_id, context=aq_parent(context))
            transaction.get().note('collective.logbook traceback [%s]' %
                    entry_url)
            transaction.commit()
        finally:
            cleanup_lock.release()
    # only warning
    except Exception, e:
        LOGGER.warning("An error occured while handling the traceback")
        LOGGER.warning("%s" % e)
        LOGGER.exception(e)
예제 #2
0
def upload_file():
    try:
        uploaded_file = request.files['audio_data']
        if uploaded_file.filename != '':
            uploaded_file.save('/tmp/' + uploaded_file.filename + '.wav')
            s3 = boto3.client('s3')
            s3.upload_file('/tmp/' + uploaded_file.filename + '.wav',
                           'datasets-masters-2020',
                           uploaded_file.filename + '.wav')
        return "Success", 200
    except Exception as e:
        logger.exception(e)
        return "Error", 500
예제 #3
0
def facebook_convert_to_id(fb_url):
    # https://www.facebook.com/profile.php?id=1000
    url_parse = urlparse(fb_url)
    if "facebook.com" in url_parse.netloc:
        if "/profile.php" in url_parse.path:
            preg = "id=(\d{1,})"
            m = re.findall(preg, url_parse.query)
            if len(m):
                LOGGER.info(
                    'successfully converted facebook id for url : [%s to %s]' %
                    (fb_url, m[0]))
                return m[0]
        # https://facebook.com/zuck
        headers = get_headers()
        csrfmiddlewaretoken = get_csrfmiddlewaretoken(headers)
        if csrfmiddlewaretoken:
            data = {
                "csrfmiddlewaretoken": csrfmiddlewaretoken,
                "fburl": fb_url
            }
            headers.update({
                "Cookie":
                "csrftoken={csrfmiddlewaretoken}".format(
                    csrfmiddlewaretoken=csrfmiddlewaretoken)
            })
            try:
                r = requests.post('http://findmyfbid.in/',
                                  data=data,
                                  timeout=10,
                                  allow_redirects=False,
                                  proxies=PROXIES,
                                  headers=headers)
                location = r.headers['Location']
                preg = r'\d{1,}'
                m = re.findall(preg, location)
                if len(m):
                    LOGGER.info(
                        'successfully converted facebook id for url : [%s to %s]'
                        % (fb_url, m[0]))
                    return m[0]
                else:
                    return None
            except Exception as e:
                LOGGER.exception(e)
                return None
        else:
            return None
    else:
        return None
예제 #4
0
async def fetch(client, url):
    with async_timeout.timeout(15):
        try:
            headers = {'user-agent': get_random_user_agent()}

            async with client.get(url, headers=headers) as response:
                assert response.status == 200
                LOGGER.info('Task url: {}'.format(response.url))
                try:
                    text = await response.text()
                except:
                    text = await response.read()
                return text
        except Exception as e:
            LOGGER.exception(e)
            return None
예제 #5
0
def get_csrfmiddlewaretoken(headers) -> str:
    csrfmiddlewaretoken = ''
    try:
        result = requests.get(url="http://findmyfbid.in/",
                              headers=headers,
                              proxies=PROXIES,
                              timeout=10).text
        preg = r"name='csrfmiddlewaretoken' value='(.*?)'"
        preg_token = re.findall(preg, result)
        if preg_token:
            csrfmiddlewaretoken = preg_token[0]
            LOGGER.info('successfully get  csrfmiddlewaretoken: [%s]' %
                        (csrfmiddlewaretoken))
    except Exception as e:
        LOGGER.exception(e)
    return csrfmiddlewaretoken
예제 #6
0
def process_file(event, context):
    try:

        bucket = event['Records'][0]['s3']['bucket']['name']
        s3_client = boto3.client('s3')
        key = event['Records'][0]['s3']['object']['key']
        if key.split('/')[0] != 'splits':
            # Get the bytes from S3
            file_loc = '/tmp/' + key
            # Download this file to writable tmp space.
            logger.debug(file_loc)
            logger.debug(key)
            logger.debug(bucket)
            s3_client.download_file(bucket, key, file_loc)
            song = AudioSegment.from_wav(file_loc)

            dBFS = song.dBFS
            chunks = split_on_silence(
                song,
                min_silence_len=1000,

                # anything under -16 dBFS is considered silence
                silence_thresh=dBFS - 16,

                # keep 200 ms of leading/trailing silence
                keep_silence=200)
            logger.debug(chunks)
            for i, chunk in enumerate(chunks):
                silence_chunk = AudioSegment.silent(duration=200)
                audio_chunk = silence_chunk + chunk + silence_chunk
                normalized_chunk = match_target_amplitude(audio_chunk, -20.0)
                logger.debug("Exporting chunk{0}.mp3.".format(i))
                normalized_chunk.export("/tmp/chunk{0}.mp3".format(i),
                                        bitrate="320k",
                                        format="mp3")
                s3_client.upload_file(
                    "/tmp/chunk{0}.mp3".format(i), 'datasets-masters-2020',
                    "splits/{0}/chunk_{1}.mp3".format(key.split('.')[0], i))
            return
        else:
            logger.debug('Nothing to do here')
            return

    except Exception as e:
        logger.exception(e)
        return "Error", 500
예제 #7
0
async def data_extraction_for_phone(html):
    with async_timeout.timeout(10):
        try:
            # Get title
            data_log = eval(html['data-log'])
            url = data_log.get('mu', None)
            if not url:
                return None
            # Get title
            title = html.find('h3').get_text()
            # Get author and update_time (option)
            novel_mess = html.findAll(class_='c-gap-right-large')
            basic_mess = [i.get_text()
                          for i in novel_mess] if novel_mess else None
            return {'title': title, 'url': url, 'basic_mess': basic_mess}
        except Exception as e:
            LOGGER.exception(e)
            return None
예제 #8
0
 async def get_photo_url(self, loop) -> bool:
     """
     Get user photo's url
     :return: True or False
     """
     async with aiohttp.ClientSession(loop=None) as client:
         asyncio.sleep(1)
         html = await self.fetch(client=client, url=self.single_url)
         if html:
             preg_type = r"<meta\s*property=\"og:type\"\s*content=\"(.*?)\"\s*/>"
             type = re.findall(preg_type, str(html))
             if type:
                 if type[0] == "video":
                     preg_url = r"<meta\s*property=\"og:video\"\s*content=\"(.*?)\"\s*/>"
                 else:
                     preg_url = r"<meta\s*property=\"og:image\"\s*content=\"(.*?)\"\s*/>"
                 target_url_res = re.findall(preg_url, str(html))
                 if target_url_res:
                     target_url = target_url_res[0]
                     target_name = target_url[-25:].replace('/', '-')
                     if not os.path.exists(self._dir + target_name):
                         asyncio.sleep(random.randint(1, 2))
                         target_result = await self.fetch(client=client,
                                                          url=target_url)
                         if target_result:
                             LOGGER.info("Downloading {target_url}".format(
                                 target_url=target_url))
                             try:
                                 with open(self._dir + target_name,
                                           'wb') as file:
                                     file.write(target_result)
                                     LOGGER.info(
                                         'File downloaded successfully in {dir}'
                                         .format(dir=self._dir +
                                                 target_name))
                                 return True
                             except Exception as e:
                                 LOGGER.exception(e)
                                 return False
                     else:
                         return True
                 return False
             else:
                 return False
예제 #9
0
 async def fetch(self, client, url) -> bytes:
     """
     fetch url
     :param client: aiohttp client
     :param url: request url
     :return: response.read()
     """
     with async_timeout.timeout(10):
         try:
             headers = {'user-agent': self.get_random_user_agent()}
             proxy = PROXIES if PROXIES else None
             async with client.get(url, headers=headers,
                                   proxy=proxy) as response:
                 assert response.status == 200
                 LOGGER.info('Task url: {}'.format(response.url))
                 text = await response.read()
                 return text
         except Exception as e:
             LOGGER.exception(e)
             return None
예제 #10
0
async def get_real_url(client, url):
    with async_timeout.timeout(10):
        try:
            headers = {'user-agent': get_random_user_agent()}
            async with client.get(url, headers=headers,
                                  allow_redirects=True) as response:
                assert response.status == 200
                LOGGER.info('Parse url: {}'.format(response.url))
                # text = ""
                # try:
                #     text = await response.text()
                # except:
                #     text = await response.read()
                # if text:
                #     print(text)
                #     text = re.findall(r'replace\(\"(.*?)\"\)', str(text))
                #     text = text[0] if text[0] else ""
                url = response.url if response.url else None
                return url
        except Exception as e:
            LOGGER.exception(e)
            return None
예제 #11
0
    def Cold_boot(self, url, pause=2):

        time.sleep(pause)
        headers = {'user-agent': self.get_random_user_agent()}
        try:
            requests.packages.urllib3.disable_warnings(
                requests.packages.urllib3.exceptions.InsecureRequestWarning)
            r = requests.get(url=url,
                             proxies=self.proxies,
                             headers=headers,
                             allow_redirects=False,
                             verify=False,
                             timeout=30)
            LOGGER.info(url)
            content = r.content
            charset = cchardet.detect(content)
            text = content.decode(charset['encoding'])
            bsObj = BeautifulSoup(text, "lxml")
            return bsObj
        except Exception as e:
            LOGGER.exception(e)
            return None